def douban_recommendation(data, url, start_index=None):
    data = loads(data)
    entry_list = data[u'entry']

    user_id, url = map(
        str,
        [i['@href'].strip('/').rsplit('/', 1)[-1]
        for i in data[u'author'][u'link'][:2]]
    )
    
    if start_index == 1:
        name = data[u'title'][u'$t'][:-4]
        DoubanUser.new(user_id, url, name)

    if entry_list:
        for i in entry_list:
            title = i[u'content'][u'$t'].replace('\r', ' ').replace('\n', ' ').strip()

#            for uid in user_id_by_txt(title):
#                yield douban_recommendation_begin_tuple(uid)

            attribute = i[u'db:attribute']
            cid = str(attribute[0][u'$t'])
            if cid in DOUBAN_REC_CID:
                cid = DOUBAN_REC_CID[cid]
                id = i[u'id'][u'$t'].rsplit('/', 1)[1]
                time = i[u'published'][u'$t'].split('+', 1)[0]
                time = int_by_string(time)
                douban_rec_new(
                    id ,
                    user_id, cid, title,
                    time
                )
                from douban_parse import DOUBAN_REC_PARSE
                if cid in DOUBAN_REC_PARSE:
                    _ = DOUBAN_REC_PARSE[cid](title, user_id)
                    if _ is not None:
                        for item in _:
                            yield item

        if start_index is not None:
            start = start_index+10
            url = '%s&max-result=10&start-index=%s'%(URL_REC%user_id, start)
            yield douban_recommendation, url, start
    else:
        f = DoubanFetched.get_or_create(id=user_id)
        f.save()
Esempio n. 2
0
def douban_recommendation(data, url, start_index=None):
    data = loads(data)
    entry_list = data[u'entry']

    user_id, url = map(str, [
        i['@href'].strip('/').rsplit('/', 1)[-1]
        for i in data[u'author'][u'link'][:2]
    ])

    if start_index == 1:
        name = data[u'title'][u'$t'][:-4]
        DoubanUser.new(user_id, url, name)

    if entry_list:
        for i in entry_list:
            title = i[u'content'][u'$t'].replace('\r',
                                                 ' ').replace('\n',
                                                              ' ').strip()

            #            for uid in user_id_by_txt(title):
            #                yield douban_recommendation_begin_tuple(uid)

            attribute = i[u'db:attribute']
            cid = str(attribute[0][u'$t'])
            if cid in DOUBAN_REC_CID:
                cid = DOUBAN_REC_CID[cid]
                id = i[u'id'][u'$t'].rsplit('/', 1)[1]
                time = i[u'published'][u'$t'].split('+', 1)[0]
                time = int_by_string(time)
                douban_rec_new(id, user_id, cid, title, time)
                from douban_parse import DOUBAN_REC_PARSE
                if cid in DOUBAN_REC_PARSE:
                    _ = DOUBAN_REC_PARSE[cid](title, user_id)
                    if _ is not None:
                        for item in _:
                            yield item

        if start_index is not None:
            start = start_index + 10
            url = '%s&max-result=10&start-index=%s' % (URL_REC % user_id,
                                                       start)
            yield douban_recommendation, url, start
    else:
        f = DoubanFetched.get_or_create(id=user_id)
        f.save()