def __call__(self, data, url):
        rid = url_last(url)
        cid = self.cid

        title = self.title(data)

        rec_num = txt_wrap_by('<span class="rec-num">', '人</span>', data) or 0
        like_num = txt_wrap_by('<span class="fav-num" data-tid="', '</a>喜欢</span>', data) or 0
        if like_num:
            like_num = txt_wrap_by('<a href="#">', '人', like_num)
            yield parse_like , URL_LIKE%(cid, rid), cid, rid

        _topic = _owner = 0

        owner_id = self.user_id(data)
        if owner_id is None:
            return

        try:
            owner_id = int(owner_id)
        except ValueError:
            _owner_id = DoubanUser.by_url(owner_id)
            if _owner_id:
                owner_id = _owner_id
            else:
                _owner = owner_id
                owner_id = 0

        topic_id = self.topic_id(data)
        try:
            topic_id = int(topic_id)
        except ValueError:
            _topic = topic_id
            topic_id = 0

        time = self.time(data)
        if time:
            time = int_by_string(time)

        feed_id = douban_feed_new(
            cid,
            rid,
            rec_num,
            like_num,
            title,
            self.htm(data),
            time,
            owner_id,
            topic_id
        )


        if _owner or _topic:
            DoubanFeedOwner(id=feed_id, topic=_topic, owner=_owner).save()

        #for user_id in user_id_by_txt(data):
        #    yield douban_recommendation_begin_tuple(user_id)

        if url in EXIST_PARSE:
            EXIST_PARSE.remove(url)
Example #2
0
    def __call__(self, data, url):
        rid = url_last(url)
        cid = self.cid

        title = self.title(data)

        rec_num = txt_wrap_by('<span class="rec-num">', '人</span>', data) or 0
        like_num = txt_wrap_by('<span class="fav-num" data-tid="',
                               '</a>喜欢</span>', data) or 0
        if like_num:
            like_num = txt_wrap_by('<a href="#">', '人', like_num)
            yield parse_like, URL_LIKE % (cid, rid), cid, rid

        _topic = _owner = 0

        owner_id = self.user_id(data)
        if owner_id is None:
            return

        try:
            owner_id = int(owner_id)
        except ValueError:
            _owner_id = DoubanUser.by_url(owner_id)
            if _owner_id:
                owner_id = _owner_id
            else:
                _owner = owner_id
                owner_id = 0

        topic_id = self.topic_id(data)
        try:
            topic_id = int(topic_id)
        except ValueError:
            _topic = topic_id
            topic_id = 0

        time = self.time(data)
        if time:
            time = int_by_string(time)

        feed_id = douban_feed_new(cid, rid, rec_num, like_num, title,
                                  self.htm(data), time, owner_id, topic_id)

        if _owner or _topic:
            DoubanFeedOwner(id=feed_id, topic=_topic, owner=_owner).save()

        #for user_id in user_id_by_txt(data):
        #    yield douban_recommendation_begin_tuple(user_id)

        if url in EXIST_PARSE:
            EXIST_PARSE.remove(url)
def douban_recommendation(data, url, start_index=None):
    data = loads(data)
    entry_list = data[u'entry']

    user_id, url = map(
        str,
        [i['@href'].strip('/').rsplit('/', 1)[-1]
        for i in data[u'author'][u'link'][:2]]
    )
    
    if start_index == 1:
        name = data[u'title'][u'$t'][:-4]
        DoubanUser.new(user_id, url, name)

    if entry_list:
        for i in entry_list:
            title = i[u'content'][u'$t'].replace('\r', ' ').replace('\n', ' ').strip()

#            for uid in user_id_by_txt(title):
#                yield douban_recommendation_begin_tuple(uid)

            attribute = i[u'db:attribute']
            cid = str(attribute[0][u'$t'])
            if cid in DOUBAN_REC_CID:
                cid = DOUBAN_REC_CID[cid]
                id = i[u'id'][u'$t'].rsplit('/', 1)[1]
                time = i[u'published'][u'$t'].split('+', 1)[0]
                time = int_by_string(time)
                douban_rec_new(
                    id ,
                    user_id, cid, title,
                    time
                )
                from douban_parse import DOUBAN_REC_PARSE
                if cid in DOUBAN_REC_PARSE:
                    _ = DOUBAN_REC_PARSE[cid](title, user_id)
                    if _ is not None:
                        for item in _:
                            yield item

        if start_index is not None:
            start = start_index+10
            url = '%s&max-result=10&start-index=%s'%(URL_REC%user_id, start)
            yield douban_recommendation, url, start
    else:
        f = DoubanFetched.get_or_create(id=user_id)
        f.save()
Example #4
0
def douban_recommendation(data, url, start_index=None):
    data = loads(data)
    entry_list = data[u'entry']

    user_id, url = map(str, [
        i['@href'].strip('/').rsplit('/', 1)[-1]
        for i in data[u'author'][u'link'][:2]
    ])

    if start_index == 1:
        name = data[u'title'][u'$t'][:-4]
        DoubanUser.new(user_id, url, name)

    if entry_list:
        for i in entry_list:
            title = i[u'content'][u'$t'].replace('\r',
                                                 ' ').replace('\n',
                                                              ' ').strip()

            #            for uid in user_id_by_txt(title):
            #                yield douban_recommendation_begin_tuple(uid)

            attribute = i[u'db:attribute']
            cid = str(attribute[0][u'$t'])
            if cid in DOUBAN_REC_CID:
                cid = DOUBAN_REC_CID[cid]
                id = i[u'id'][u'$t'].rsplit('/', 1)[1]
                time = i[u'published'][u'$t'].split('+', 1)[0]
                time = int_by_string(time)
                douban_rec_new(id, user_id, cid, title, time)
                from douban_parse import DOUBAN_REC_PARSE
                if cid in DOUBAN_REC_PARSE:
                    _ = DOUBAN_REC_PARSE[cid](title, user_id)
                    if _ is not None:
                        for item in _:
                            yield item

        if start_index is not None:
            start = start_index + 10
            url = '%s&max-result=10&start-index=%s' % (URL_REC % user_id,
                                                       start)
            yield douban_recommendation, url, start
    else:
        f = DoubanFetched.get_or_create(id=user_id)
        f.save()
Example #5
0
def wm_save(id, like, name, author, link, create_time, txt):
    wm = SpiderWm.get(wmid=id)
    if wm:
        return wm 

    now = time() 

    if '前' in create_time:
        create_time = now
    else:
        create_time = int_by_string(create_time)

    like = int(like or 0)

    wm = SpiderWm(
        wmid=id,
        like=like, name=name, author=author, link=link, time=create_time, txt=txt
    )
    wm.save()
    return wm
Example #6
0
def wm_save(id, like, name, author, link, create_time, txt):
    wm = SpiderWm.get(wmid=id)
    if wm:
        return wm

    now = time()

    if '前' in create_time:
        create_time = now
    else:
        create_time = int_by_string(create_time)

    like = int(like or 0)

    wm = SpiderWm(wmid=id,
                  like=like,
                  name=name,
                  author=author,
                  link=link,
                  time=create_time,
                  txt=txt)
    wm.save()
    return wm