def handler(self, root, users, urlPack): # @UnusedVariable log.debug("return users len:[{}]".format(len(users))) for user in users: key = dbtools.gen_object_key('AUTHOR', 'kuaishou', user['user_id']) if not self.db.isObjectUpdatedRecently(const.getTable('AUTHOR'), key, 365 * 86400): log.debug("search result, pcursor={}, user_id={}".format(urlPack.getKey('pcursor'), user['user_id'])) msg = Message(const.DATA_TYPE_AUTHOR, user['user_id']) self.publish(msg) obj = dbtools.MongoObject() obj.setMeta(const.DATA_TYPE_AUTHOR, const_kuaishou.DATA_PROVIDER, user['user_id']) obj.setData(user) obj.save() log.debug("KuaiShouSearchUserSaver Inserting obj {}".format(obj.getLastObjectId())) self.addStatObject(obj.getLastObjectId(), const_kuaishou.DATA_TYPE_VIDEO) #authorDetail p = self.pipe.stat.getPipeByName('KuaiShouAuthorDetailPipeCspub') msg = Message('AUTHOR', user['user_id']) p.addMessageObject(msg) if int(urlPack.getKey('pcursor')) <= 10: p = self.pipe.stat.getPipeByName('KuaiShouAuthorVideosPipeCspub') msg = Message('AUTHOR', user['user_id']) p.addMessageObject(msg) if type(users) is list and len(users) > 0: time.sleep(10) msg = Message(const_kuaishou.DATA_TYPE_KEYWORD, urlPack.extra['keyword']) msg.addKey('pcursor', int(urlPack.extra['pcursor']) + 1) self.publish(msg) log.debug("publish to next page: {}".format(self.pipe.name)) return
def handler(self, root, data, urlPack): # @UnusedVariable log.debug("huoshan main_feed_up saver handler, len={}".format( len(data["data"]))) for info in data["data"]: vid = str(info['data']['id']) uid = str(info['data']['author']['id']) obj = dbtools.MongoObject() #视频直接存下来 obj = dbtools.MongoObject() obj.setMeta("VIDEO", const_huoshan.DATA_PROVIDER, vid) obj.setUserId(uid) obj.setData(info) if not self.db.isItemUpdatedRecently(obj.key): obj.save() log.debug( "Inserting obj from HuoshanMainFeedUp video: {}".format( obj.getLastObjectId())) else: log.debug( "HuoshanMainFeedUp video: {} already inserted".format( obj.getLastObjectId())) #如果作者三天以上未更新, 则publish uid authorKey = dbtools.gen_object_key('AUTHOR', const_huoshan.DATA_PROVIDER, uid) if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400): self.addStatObject(authorKey, const_huoshan.DATA_TYPE_AUTHOR) msg = Message(const_huoshan.DATA_TYPE_AUTHOR, uid) self.pipe.publish(msg) else: log.debug("author updated recently") return
def handler(self, root, data, urlPack): # @UnusedVariable log.debug("kuaishou main feed saver handler, len={}".format( len(data["feeds"]))) for info in data["feeds"]: vid = str(info['photo_id']) uid = str(info['user_id']) #视频直接存下来 obj = dbtools.MongoObject() obj.setMeta("VIDEO", const_kuaishou.DATA_PROVIDER, vid) obj.setUserId(uid) obj.setData(info) if not self.db.isItemUpdatedRecently(obj.key): obj.save() log.debug("Inserting obj from KuaishouMainFeed: {}".format( obj.getLastObjectId())) #如果作者三天以上未更新, 则publish uid authorKey = dbtools.gen_object_key('AUTHOR', const_kuaishou.DATA_PROVIDER, uid) if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400): objAuthor = dbtools.MongoObject() objAuthor.setMeta("AUTHOR", const_kuaishou.DATA_PROVIDER, uid) objAuthor.save() self.addStatObject(authorKey, const_kuaishou.DATA_TYPE_AUTHOR) msg = Message(const_kuaishou.DATA_TYPE_AUTHOR, uid) self.pipe.publish(msg) else: log.notice("kuaishou author updated recently") return
def onReceiveMsg(self, msg): """ :param msg: """ log.debug("KuaiShouVideoDetailProvider receive {}".format(msg)) if msg.msgType == const_kuaishou.DATA_TYPE_VIDEO: key = dbtools.gen_object_key(const.DATA_TYPE_VIDEO, 'kuaishou', msg.msgData) if self.db.getOne(const.getTable('VIDEO'), key, '_key_') is None: urlPack = urlprovider.UrlPack(priority=0, url=self.url) urlPack.setForm(self.form.format(vid=msg.msgData)) urlPack.fillMsg(msg, self.pipe) self.add(urlPack) return True else: log.debug("vid:{} has already inserted".format(msg.msgData)) return False
def handler(self, root, data, urlPack): # @UnusedVariable comments = data["data"]["comments"] vid = urlPack.getKey("vid") offset = urlPack.getKey("offset") for comment in comments: uid = comment["user"]["id"] log.debug("HuoshanVideoComments get one uid: {}".format(uid)) authorKey = dbtools.gen_object_key('AUTHOR', const_huoshan.DATA_PROVIDER, uid) if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400): msg = Message(const_huoshan.DATA_TYPE_AUTHOR, uid) self.publish(msg) else: log.debug("huoshan user_id:{} has already updated".format(uid)) if data['extra']['has_more']: msg = Message(const.DATA_TYPE_VIDEO, vid) msg.setExtra('offset', offset + 1) self.publish(msg) return