def main(): from zweb.orm import ormiter exist = set() for i in ormiter(DoubanFeedOwner): topic_id = None user_id = None feed = DoubanFeed.get(i.id) if feed.cid == CID_DOUBAN_FEED_TOPIC: group_url = feed.topic_id or i.topic group = DoubanGroup.by_url(group_url) if not group: if not group_url in exist: exist.add(group_url) yield parse_group_htm, 'http://www.douban.com/group/%s/' % group_url else: topic_id = group user_id = feed.user_id or i.owner if not (user_id and str(user_id).isdigit()): user_id = DoubanUser.by_url(user_id) if topic_id is not None and user_id: feed.topic_id = topic_id feed.user_id = user_id feed.save() i.delete()
def __call__(self, data, url): rid = url_last(url) cid = self.cid title = self.title(data) rec_num = txt_wrap_by('<span class="rec-num">', '人</span>', data) or 0 like_num = txt_wrap_by('<span class="fav-num" data-tid="', '</a>喜欢</span>', data) or 0 if like_num: like_num = txt_wrap_by('<a href="#">', '人', like_num) yield parse_like , URL_LIKE%(cid, rid), cid, rid _topic = _owner = 0 owner_id = self.user_id(data) if owner_id is None: return try: owner_id = int(owner_id) except ValueError: _owner_id = DoubanUser.by_url(owner_id) if _owner_id: owner_id = _owner_id else: _owner = owner_id owner_id = 0 topic_id = self.topic_id(data) try: topic_id = int(topic_id) except ValueError: _topic = topic_id topic_id = 0 time = self.time(data) if time: time = int_by_string(time) feed_id = douban_feed_new( cid, rid, rec_num, like_num, title, self.htm(data), time, owner_id, topic_id ) if _owner or _topic: DoubanFeedOwner(id=feed_id, topic=_topic, owner=_owner).save() #for user_id in user_id_by_txt(data): # yield douban_recommendation_begin_tuple(user_id) if url in EXIST_PARSE: EXIST_PARSE.remove(url)
def main(): from zweb.orm import ormiter exist = set() for i in ormiter(DoubanFeedOwner): topic_id = None user_id = None feed = DoubanFeed.get(i.id) if feed.cid == CID_DOUBAN_FEED_TOPIC: group_url = feed.topic_id or i.topic group = DoubanGroup.by_url(group_url) if not group: if not group_url in exist: exist.add(group_url) yield parse_group_htm, "http://www.douban.com/group/%s/" % group_url else: topic_id = group user_id = feed.user_id or i.owner if not (user_id and str(user_id).isdigit()): user_id = DoubanUser.by_url(user_id) if topic_id is not None and user_id: feed.topic_id = topic_id feed.user_id = user_id feed.save() i.delete()
def douban_recommendation_begin_tuple(id): id = str(id) if DoubanUser.by_url(id): return if id in EXIST: return EXIST.add(id) return douban_recommendation, URL_REC%id, 1
def douban_recommendation_begin_tuple(id): id = str(id) if DoubanUser.by_url(id): return if id in EXIST: return EXIST.add(id) return douban_recommendation, URL_REC % id, 1
def __call__(self, data, url): rid = url_last(url) cid = self.cid title = self.title(data) rec_num = txt_wrap_by('<span class="rec-num">', '人</span>', data) or 0 like_num = txt_wrap_by('<span class="fav-num" data-tid="', '</a>喜欢</span>', data) or 0 if like_num: like_num = txt_wrap_by('<a href="#">', '人', like_num) yield parse_like, URL_LIKE % (cid, rid), cid, rid _topic = _owner = 0 owner_id = self.user_id(data) if owner_id is None: return try: owner_id = int(owner_id) except ValueError: _owner_id = DoubanUser.by_url(owner_id) if _owner_id: owner_id = _owner_id else: _owner = owner_id owner_id = 0 topic_id = self.topic_id(data) try: topic_id = int(topic_id) except ValueError: _topic = topic_id topic_id = 0 time = self.time(data) if time: time = int_by_string(time) feed_id = douban_feed_new(cid, rid, rec_num, like_num, title, self.htm(data), time, owner_id, topic_id) if _owner or _topic: DoubanFeedOwner(id=feed_id, topic=_topic, owner=_owner).save() #for user_id in user_id_by_txt(data): # yield douban_recommendation_begin_tuple(user_id) if url in EXIST_PARSE: EXIST_PARSE.remove(url)