Esempio n. 1
0
def main():
    from zweb.orm import ormiter
    exist = set()
    for i in ormiter(DoubanFeedOwner):
        topic_id = None
        user_id = None

        feed = DoubanFeed.get(i.id)
        if feed.cid == CID_DOUBAN_FEED_TOPIC:
            group_url = feed.topic_id or i.topic
            group = DoubanGroup.by_url(group_url)
            if not group:
                if not group_url in exist:
                    exist.add(group_url)
                    yield parse_group_htm, 'http://www.douban.com/group/%s/' % group_url

            else:
                topic_id = group

        user_id = feed.user_id or i.owner
        if not (user_id and str(user_id).isdigit()):
            user_id = DoubanUser.by_url(user_id)

        if topic_id is not None and user_id:
            feed.topic_id = topic_id
            feed.user_id = user_id
            feed.save()
            i.delete()
    def __call__(self, data, url):
        rid = url_last(url)
        cid = self.cid

        title = self.title(data)

        rec_num = txt_wrap_by('<span class="rec-num">', '人</span>', data) or 0
        like_num = txt_wrap_by('<span class="fav-num" data-tid="', '</a>喜欢</span>', data) or 0
        if like_num:
            like_num = txt_wrap_by('<a href="#">', '人', like_num)
            yield parse_like , URL_LIKE%(cid, rid), cid, rid

        _topic = _owner = 0

        owner_id = self.user_id(data)
        if owner_id is None:
            return

        try:
            owner_id = int(owner_id)
        except ValueError:
            _owner_id = DoubanUser.by_url(owner_id)
            if _owner_id:
                owner_id = _owner_id
            else:
                _owner = owner_id
                owner_id = 0

        topic_id = self.topic_id(data)
        try:
            topic_id = int(topic_id)
        except ValueError:
            _topic = topic_id
            topic_id = 0

        time = self.time(data)
        if time:
            time = int_by_string(time)

        feed_id = douban_feed_new(
            cid,
            rid,
            rec_num,
            like_num,
            title,
            self.htm(data),
            time,
            owner_id,
            topic_id
        )


        if _owner or _topic:
            DoubanFeedOwner(id=feed_id, topic=_topic, owner=_owner).save()

        #for user_id in user_id_by_txt(data):
        #    yield douban_recommendation_begin_tuple(user_id)

        if url in EXIST_PARSE:
            EXIST_PARSE.remove(url)
def main():
    from zweb.orm import ormiter

    exist = set()
    for i in ormiter(DoubanFeedOwner):
        topic_id = None
        user_id = None

        feed = DoubanFeed.get(i.id)
        if feed.cid == CID_DOUBAN_FEED_TOPIC:
            group_url = feed.topic_id or i.topic
            group = DoubanGroup.by_url(group_url)
            if not group:
                if not group_url in exist:
                    exist.add(group_url)
                    yield parse_group_htm, "http://www.douban.com/group/%s/" % group_url

            else:
                topic_id = group

        user_id = feed.user_id or i.owner
        if not (user_id and str(user_id).isdigit()):
            user_id = DoubanUser.by_url(user_id)

        if topic_id is not None and user_id:
            feed.topic_id = topic_id
            feed.user_id = user_id
            feed.save()
            i.delete()
def douban_recommendation_begin_tuple(id):
    id = str(id)
    if DoubanUser.by_url(id):
        return
    if id in EXIST:
        return
    EXIST.add(id)
    return douban_recommendation, URL_REC%id, 1
Esempio n. 5
0
def douban_recommendation_begin_tuple(id):
    id = str(id)
    if DoubanUser.by_url(id):
        return
    if id in EXIST:
        return
    EXIST.add(id)
    return douban_recommendation, URL_REC % id, 1
Esempio n. 6
0
    def __call__(self, data, url):
        rid = url_last(url)
        cid = self.cid

        title = self.title(data)

        rec_num = txt_wrap_by('<span class="rec-num">', '人</span>', data) or 0
        like_num = txt_wrap_by('<span class="fav-num" data-tid="',
                               '</a>喜欢</span>', data) or 0
        if like_num:
            like_num = txt_wrap_by('<a href="#">', '人', like_num)
            yield parse_like, URL_LIKE % (cid, rid), cid, rid

        _topic = _owner = 0

        owner_id = self.user_id(data)
        if owner_id is None:
            return

        try:
            owner_id = int(owner_id)
        except ValueError:
            _owner_id = DoubanUser.by_url(owner_id)
            if _owner_id:
                owner_id = _owner_id
            else:
                _owner = owner_id
                owner_id = 0

        topic_id = self.topic_id(data)
        try:
            topic_id = int(topic_id)
        except ValueError:
            _topic = topic_id
            topic_id = 0

        time = self.time(data)
        if time:
            time = int_by_string(time)

        feed_id = douban_feed_new(cid, rid, rec_num, like_num, title,
                                  self.htm(data), time, owner_id, topic_id)

        if _owner or _topic:
            DoubanFeedOwner(id=feed_id, topic=_topic, owner=_owner).save()

        #for user_id in user_id_by_txt(data):
        #    yield douban_recommendation_begin_tuple(user_id)

        if url in EXIST_PARSE:
            EXIST_PARSE.remove(url)