def handler(self, root, users, urlPack):  # @UnusedVariable
     log.debug("return users len:[{}]".format(len(users)))
     for user in users:
         key = dbtools.gen_object_key('AUTHOR', 'kuaishou', user['user_id'])
         if not self.db.isObjectUpdatedRecently(const.getTable('AUTHOR'), key, 365 * 86400):
             log.debug("search result, pcursor={}, user_id={}".format(urlPack.getKey('pcursor'), user['user_id']))
             msg = Message(const.DATA_TYPE_AUTHOR, user['user_id'])
             self.publish(msg)
             obj = dbtools.MongoObject()
             obj.setMeta(const.DATA_TYPE_AUTHOR, const_kuaishou.DATA_PROVIDER, user['user_id'])
             obj.setData(user)
             obj.save()
             log.debug("KuaiShouSearchUserSaver Inserting obj {}".format(obj.getLastObjectId()))
             self.addStatObject(obj.getLastObjectId(), const_kuaishou.DATA_TYPE_VIDEO)
             
             #authorDetail
             p = self.pipe.stat.getPipeByName('KuaiShouAuthorDetailPipeCspub')
             msg = Message('AUTHOR', user['user_id'])
             p.addMessageObject(msg)
             
             if int(urlPack.getKey('pcursor')) <= 10:
                 p = self.pipe.stat.getPipeByName('KuaiShouAuthorVideosPipeCspub')
                 msg = Message('AUTHOR', user['user_id'])
                 p.addMessageObject(msg)
             
     if type(users) is list and len(users) > 0:
         time.sleep(10)
         msg = Message(const_kuaishou.DATA_TYPE_KEYWORD, urlPack.extra['keyword'])
         msg.addKey('pcursor', int(urlPack.extra['pcursor']) + 1)
         self.publish(msg)
         log.debug("publish to next page: {}".format(self.pipe.name))
     return
Exemple #2
0
    def handler(self, root, data, urlPack):  # @UnusedVariable
        log.debug("huoshan main_feed_up saver handler, len={}".format(
            len(data["data"])))
        for info in data["data"]:
            vid = str(info['data']['id'])
            uid = str(info['data']['author']['id'])
            obj = dbtools.MongoObject()
            #视频直接存下来
            obj = dbtools.MongoObject()
            obj.setMeta("VIDEO", const_huoshan.DATA_PROVIDER, vid)
            obj.setUserId(uid)
            obj.setData(info)
            if not self.db.isItemUpdatedRecently(obj.key):
                obj.save()
                log.debug(
                    "Inserting obj from HuoshanMainFeedUp video: {}".format(
                        obj.getLastObjectId()))
            else:
                log.debug(
                    "HuoshanMainFeedUp video: {} already inserted".format(
                        obj.getLastObjectId()))

            #如果作者三天以上未更新, 则publish uid
            authorKey = dbtools.gen_object_key('AUTHOR',
                                               const_huoshan.DATA_PROVIDER,
                                               uid)
            if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400):
                self.addStatObject(authorKey, const_huoshan.DATA_TYPE_AUTHOR)
                msg = Message(const_huoshan.DATA_TYPE_AUTHOR, uid)
                self.pipe.publish(msg)
            else:
                log.debug("author updated recently")

        return
Exemple #3
0
    def handler(self, root, data, urlPack):  # @UnusedVariable
        log.debug("kuaishou main feed saver handler, len={}".format(
            len(data["feeds"])))
        for info in data["feeds"]:
            vid = str(info['photo_id'])
            uid = str(info['user_id'])

            #视频直接存下来
            obj = dbtools.MongoObject()
            obj.setMeta("VIDEO", const_kuaishou.DATA_PROVIDER, vid)
            obj.setUserId(uid)
            obj.setData(info)
            if not self.db.isItemUpdatedRecently(obj.key):
                obj.save()
                log.debug("Inserting obj from KuaishouMainFeed: {}".format(
                    obj.getLastObjectId()))

            #如果作者三天以上未更新, 则publish uid
            authorKey = dbtools.gen_object_key('AUTHOR',
                                               const_kuaishou.DATA_PROVIDER,
                                               uid)
            if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400):
                objAuthor = dbtools.MongoObject()
                objAuthor.setMeta("AUTHOR", const_kuaishou.DATA_PROVIDER, uid)
                objAuthor.save()
                self.addStatObject(authorKey, const_kuaishou.DATA_TYPE_AUTHOR)
                msg = Message(const_kuaishou.DATA_TYPE_AUTHOR, uid)
                self.pipe.publish(msg)
            else:
                log.notice("kuaishou author updated recently")

        return
Exemple #4
0
 def onReceiveMsg(self, msg):
     """
     :param msg:
     """
     log.debug("KuaiShouVideoDetailProvider receive {}".format(msg))
     if msg.msgType == const_kuaishou.DATA_TYPE_VIDEO:
         key = dbtools.gen_object_key(const.DATA_TYPE_VIDEO, 'kuaishou', msg.msgData)
         if self.db.getOne(const.getTable('VIDEO'), key, '_key_') is None:
             urlPack = urlprovider.UrlPack(priority=0, url=self.url)
             urlPack.setForm(self.form.format(vid=msg.msgData))
             urlPack.fillMsg(msg, self.pipe)
             self.add(urlPack)
             return True
         else:
             log.debug("vid:{} has already inserted".format(msg.msgData))
     return False
Exemple #5
0
    def handler(self, root, data, urlPack):  # @UnusedVariable
        comments = data["data"]["comments"]
        vid = urlPack.getKey("vid")
        offset = urlPack.getKey("offset")

        for comment in comments:
            uid = comment["user"]["id"]
            log.debug("HuoshanVideoComments get one uid: {}".format(uid))
            authorKey = dbtools.gen_object_key('AUTHOR',
                                               const_huoshan.DATA_PROVIDER,
                                               uid)
            if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400):
                msg = Message(const_huoshan.DATA_TYPE_AUTHOR, uid)
                self.publish(msg)
            else:
                log.debug("huoshan user_id:{} has already updated".format(uid))

        if data['extra']['has_more']:
            msg = Message(const.DATA_TYPE_VIDEO, vid)
            msg.setExtra('offset', offset + 1)
            self.publish(msg)
        return