Exemple #1
0
    def handler(self, root, data, urlPack):  # @UnusedVariable
        log.debug("kuaishou main feed saver handler, len={}".format(
            len(data["feeds"])))
        for info in data["feeds"]:
            vid = str(info['photo_id'])
            uid = str(info['user_id'])

            #视频直接存下来
            obj = dbtools.MongoObject()
            obj.setMeta("VIDEO", const_kuaishou.DATA_PROVIDER, vid)
            obj.setUserId(uid)
            obj.setData(info)
            if not self.db.isItemUpdatedRecently(obj.key):
                obj.save()
                log.debug("Inserting obj from KuaishouMainFeed: {}".format(
                    obj.getLastObjectId()))

            #如果作者三天以上未更新, 则publish uid
            authorKey = dbtools.gen_object_key('AUTHOR',
                                               const_kuaishou.DATA_PROVIDER,
                                               uid)
            if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400):
                objAuthor = dbtools.MongoObject()
                objAuthor.setMeta("AUTHOR", const_kuaishou.DATA_PROVIDER, uid)
                objAuthor.save()
                self.addStatObject(authorKey, const_kuaishou.DATA_TYPE_AUTHOR)
                msg = Message(const_kuaishou.DATA_TYPE_AUTHOR, uid)
                self.pipe.publish(msg)
            else:
                log.notice("kuaishou author updated recently")

        return
Exemple #2
0
    def handler(self, root, data, urlPack):  # @UnusedVariable
        log.debug("huoshan main_feed_up saver handler, len={}".format(
            len(data["data"])))
        for info in data["data"]:
            vid = str(info['data']['id'])
            uid = str(info['data']['author']['id'])
            obj = dbtools.MongoObject()
            #视频直接存下来
            obj = dbtools.MongoObject()
            obj.setMeta("VIDEO", const_huoshan.DATA_PROVIDER, vid)
            obj.setUserId(uid)
            obj.setData(info)
            if not self.db.isItemUpdatedRecently(obj.key):
                obj.save()
                log.debug(
                    "Inserting obj from HuoshanMainFeedUp video: {}".format(
                        obj.getLastObjectId()))
            else:
                log.debug(
                    "HuoshanMainFeedUp video: {} already inserted".format(
                        obj.getLastObjectId()))

            #如果作者三天以上未更新, 则publish uid
            authorKey = dbtools.gen_object_key('AUTHOR',
                                               const_huoshan.DATA_PROVIDER,
                                               uid)
            if not self.db.isItemUpdatedRecently(authorKey, 3 * 86400):
                self.addStatObject(authorKey, const_huoshan.DATA_TYPE_AUTHOR)
                msg = Message(const_huoshan.DATA_TYPE_AUTHOR, uid)
                self.pipe.publish(msg)
            else:
                log.debug("author updated recently")

        return
    def handleChallengeInfo(self, root, dataDict, urlPack):  # @UnusedVariable
        for data in dataDict['challenge_list']:
            data = data['challenge_info']
            if data['user_count'] < 10:
                continue
            obj = dbtools.MongoObject(self.db)
            obj.setMeta(const_douyin.DATA_TYPE_TOPIC,
                        const_douyin.DATA_PROVIDER, data["cid"])
            obj.setData(data)
            #最近一天更新过
            if not self.db.isObjectUpdatedRecently(
                    const_douyin.MONGO_TABLE_TOPIC, obj.key, 86400):
                #self.publish(Message(const_douyin.DATA_TYPE_TOPIC, data["cid"]))
                obj.save(const_douyin.MONGO_TABLE_TOPIC)
                log.debug(
                    "DouyinTopicByKeywordSaver Inserting obj _key_={}, user_count={}"
                    .format(obj.key, data['user_count']))
                msg = Message(const_douyin.DATA_TYPE_TOPIC, data["cid"])
                self.publish(msg)
                self.addStatObject(obj.getLastObjectId(), "TOPIC")

        if dataDict['has_more'] > 0:
            msg = Message(const_douyin.DATA_TYPE_TOPIC_KEYWORD,
                          urlPack.getKey('keyword'))
            msg.setExtra('keyword', urlPack.getKey('keyword'))
            msg.setExtra('cursor', urlPack.getKey('cursor', 0) + 20)
            self.publish(msg)
        return
 def handleChallengeInfo(self, root, data, urlPack):  # @UnusedVariable
     log.debug("handleChallengeInfo", data)
     obj = dbtools.MongoObject(self.db)
     obj.setMeta(const_douyin.DATA_TYPE_TOPIC, const_douyin.DATA_PROVIDER, data["cid"])
     if not self.db.isObjectUpdatedRecently(const_douyin.MONGO_TABLE_TOPIC, obj.key):
         self.publish(Message(const_douyin.DATA_TYPE_TOPIC, data["cid"]))
     return
 def handler(self, root, users, urlPack):  # @UnusedVariable
     log.debug("return users len:[{}]".format(len(users)))
     for user in users:
         key = dbtools.gen_object_key('AUTHOR', 'kuaishou', user['user_id'])
         if not self.db.isObjectUpdatedRecently(const.getTable('AUTHOR'), key, 365 * 86400):
             log.debug("search result, pcursor={}, user_id={}".format(urlPack.getKey('pcursor'), user['user_id']))
             msg = Message(const.DATA_TYPE_AUTHOR, user['user_id'])
             self.publish(msg)
             obj = dbtools.MongoObject()
             obj.setMeta(const.DATA_TYPE_AUTHOR, const_kuaishou.DATA_PROVIDER, user['user_id'])
             obj.setData(user)
             obj.save()
             log.debug("KuaiShouSearchUserSaver Inserting obj {}".format(obj.getLastObjectId()))
             self.addStatObject(obj.getLastObjectId(), const_kuaishou.DATA_TYPE_VIDEO)
             
             #authorDetail
             p = self.pipe.stat.getPipeByName('KuaiShouAuthorDetailPipeCspub')
             msg = Message('AUTHOR', user['user_id'])
             p.addMessageObject(msg)
             
             if int(urlPack.getKey('pcursor')) <= 10:
                 p = self.pipe.stat.getPipeByName('KuaiShouAuthorVideosPipeCspub')
                 msg = Message('AUTHOR', user['user_id'])
                 p.addMessageObject(msg)
             
     if type(users) is list and len(users) > 0:
         time.sleep(10)
         msg = Message(const_kuaishou.DATA_TYPE_KEYWORD, urlPack.extra['keyword'])
         msg.addKey('pcursor', int(urlPack.extra['pcursor']) + 1)
         self.publish(msg)
         log.debug("publish to next page: {}".format(self.pipe.name))
     return
    def handleUserDetail(self, root, data, urlPack):  # @UnusedVariable
        cursor = data["max_cursor"]
        aweme_list = data["aweme_list"]
        for aweme in aweme_list:
            vid = aweme["aweme_id"]
            uid = aweme["author_user_id"]
            obj = dbtools.MongoObject()
            obj.setMeta(const_douyin.DATA_TYPE_VIDEO,
                        const_douyin.DATA_PROVIDER,
                        vid,
                        version=const_douyin.DATA_VERSION)
            obj.setData(aweme)
            obj.setUserId(uid)
            obj.save(const_douyin.MONGO_TABLE_VIDEO)
            log.debug("DouyinAuthorVideoSaver Inserting obj {}".format(
                obj.getLastObjectId()))
            self.addStatObject(obj.getLastObjectId(),
                               const_douyin.DATA_TYPE_VIDEO)

        if data["has_more"] == 1:
            msg = Message(const_douyin.DATA_TYPE_AUTHOR, uid)
            msg.setExtra("cursor", cursor)
            self.publish(msg)
        else:
            log.debug("DouyinAuthorVideoSaver: no more!")

        return
Exemple #7
0
 def handleChallengeInfo(self, root, data, urlPack):  # @UnusedVariable
     log.debug(data)
     obj = dbtools.MongoObject()
     obj.setMeta(self.DATA_TYPE_TOPIC, self.DATA_PROVIDER, data["cid"])
     obj.setData(data)
     obj.save(self.MONGO_TABLE_TOPIC)
     return
 def handleAwemeList(self, root, data, urlPack):  # @UnusedVariable
     for music in data:
         obj = dbtools.MongoObject()
         obj.setMeta(const_douyin.DATA_TYPE_VIDEO, const_douyin.DATA_PROVIDER, music["aweme_id"], version=self.DATA_VERSION)
         obj.setData(music)
         obj.save()
         log.debug("DouyinTopicSaver Insert obj {}".format(obj.getLastObjectId()))
         self.addStatObject(obj.getLastObjectId(), const_douyin.DATA_TYPE_VIDEO)
         self.publish(Message(const_douyin.DATA_TYPE_VIDEO, music["aweme_id"]))
     return
 def handleAwemeDetail(self, root, data, urlPack):  # @UnusedVariable
     if type(data) == dict:
         data = [data]
     for aweme in data:
         obj = dbtools.MongoObject()
         obj.setMeta(const_douyin.DATA_TYPE_VIDEO, const_douyin.DATA_PROVIDER, aweme["aweme_id"], version=const_douyin.DATA_VERSION)
         obj.setData(aweme)
         obj.save(const_douyin.MONGO_TABLE_VIDEO)
         log.debug("DouyinVideoDetailSaver Inserting obj {}".format(obj.getLastObjectId()))
         self.addStatObject(obj.getLastObjectId(), const_douyin.DATA_TYPE_VIDEO)
     return
    def handler(self, root, data, urlPack):  # @UnusedVariable
        feeds = data["feeds"]
        pcursor = data["pcursor"]
        tag = urlPack.getKey("tag")
        log.debug(
            "KuaiShouVideoListSaver tag:{}, feed length: {}, pcursor: {}".
            format(tag, len(feeds), pcursor))
        for info in feeds:
            info[self.pipe.name] = int(time.time())
            obj = dbtools.MongoObject()
            obj.setMeta(const_kuaishou.DATA_TYPE_VIDEO,
                        const_kuaishou.DATA_PROVIDER,
                        info["photo_id"],
                        version=const_kuaishou.DATA_VERSION)
            obj.setData(info)
            obj.setUserId(info['user_id'])
            obj.setTopicId(urlPack.getKey("topic_id"))
            obj.save()
            log.debug("KuaiShouTagFeedSaver Inserting obj {}, tag={}".format(
                obj.getLastObjectId(), tag))
            self.addStatObject(obj.getLastObjectId(),
                               const_kuaishou.DATA_TYPE_VIDEO)
            authorId = info['user_id']

            author_obj = dbtools.MongoObject(db=self.db)
            author_obj.setMeta(const_kuaishou.DATA_TYPE_AUTHOR,
                               const_kuaishou.DATA_PROVIDER, authorId)
            if not self.db.isItemUpdatedRecently(author_obj.key, 3 * 86400):
                msg = Message(const_kuaishou.DATA_TYPE_AUTHOR, authorId)
                self.publish(msg)
            else:
                log.debug("skip user_id:{}".format(authorId))

        if pcursor != "no_more":
            msg = Message(const_kuaishou.DATA_TYPE_TAG_NAME, tag)
            msg.setExtra("topic_id", urlPack.getKey("topic_id"))
            msg.setExtra("pcursor", pcursor)
            self.publish(msg)
            time.sleep(60)

        return
Exemple #11
0
    def handler(self, root, data, urlPack):  # @UnusedVariable
        authorId = data['author']['id']
        del data["author"]
        obj = dbtools.MongoObject()
        obj.setMeta(const.DATA_TYPE_VIDEO, const_huoshan.DATA_PROVIDER, data["id"])
        obj.setData(data)
        obj.setUserId(authorId)
        obj.save()
        log.debug("HuoshanVideoDetailSaver Inserting obj {}".format(obj.getLastObjectId()))
        self.addStatObject(obj.getLastObjectId(), const.DATA_TYPE_VIDEO)

        return
Exemple #12
0
 def handler(self, root, data, urlPack):  # @UnusedVariable
     #log.debug("HuoShanAuthorDetailSaver", data)
     if type(data) == dict:
         data = [data]
     for user in data:
         obj = dbtools.MongoObject()
         obj.setMeta(const.DATA_TYPE_AUTHOR, const_huoshan.DATA_PROVIDER,
                     user["id"])
         obj.setData(user)
         obj.save(const.getTable(const.DATA_TYPE_AUTHOR))
         log.debug("HuoShanAuthorDetailSaver Inserting obj {}".format(
             obj.getLastObjectId()))
         self.addStatObject(obj.getLastObjectId(), const.DATA_TYPE_AUTHOR)
     return
Exemple #13
0
    def handler(self, root, data, urlPack):  # @UnusedVariable
        feeds = data["feeds"]
        pcursor = data["pcursor"]
        tag = urlPack.getKey("tag")
        log.debug("KuaiShouShareTagSaver tag:{}, feed length: {}, pcursor: {}".
                  format(tag, len(feeds), pcursor))
        for info in feeds:
            info[self.pipe.name] = int(time.time())
            authorId = info['userId']
            author_obj = dbtools.MongoObject(db=self.db)
            author_obj.setMeta(const_kuaishou.DATA_TYPE_AUTHOR,
                               const_kuaishou.DATA_PROVIDER, authorId)
            author_obj.setData(info)
            if not self.db.isItemUpdatedRecently(author_obj.key, 3 * 86400):
                author_obj.save()
                msg = Message(const_kuaishou.DATA_TYPE_AUTHOR, authorId)
                self.publish(msg)
            else:
                log.debug("skip user_id:{}".format(authorId))

            videoId = info["photoId"]
            videoId_obj = dbtools.MongoObject(db=self.db)
            videoId_obj.setMeta(const_kuaishou.DATA_TYPE_VIDEO,
                                const_kuaishou.DATA_PROVIDER, videoId)
            if not self.db.getOne(const.getTable(const.DATA_TYPE_VIDEO),
                                  videoId_obj.key):
                msg = Message(const_kuaishou.DATA_TYPE_VIDEO, videoId)
                self.publish(msg)

        if pcursor != "no_more":
            msg = Message(const_kuaishou.DATA_TYPE_TAG_NAME, tag)
            msg.setExtra("topic_id", urlPack.getKey("topic_id"))
            msg.setExtra("pcursor", pcursor)
            self.publish(msg)
            time.sleep(60)

        return
Exemple #14
0
 def handleAwemeList(self, root, data, urlPack):  # @UnusedVariable
     if type(data) == dict:
         data = [data]
     for music in data:
         obj = dbtools.MongoObject()
         obj.setMeta(const_douyin.DATA_TYPE_VIDEO,
                     const_douyin.DATA_PROVIDER, music["aweme_id"])
         obj.setData(music)
         obj.setUserId(music["author_user_id"])
         obj.save()
         log.debug("DouyinVideoSaver Inserting obj {}".format(
             obj.getLastObjectId()))
         self.addStatObject(obj.getLastObjectId(),
                            const_douyin.DATA_TYPE_VIDEO)
     return
 def handleUserDetail(self, root, data, urlPack):  # @UnusedVariable
     uid = data["user"]["uid"]
     obj = dbtools.MongoObject()
     obj.setMeta(const_douyin.DATA_TYPE_AUTHOR,
                 const_douyin.DATA_PROVIDER,
                 uid,
                 version=const_douyin.DATA_VERSION)
     obj.setData(data["user"])
     obj.save(const_douyin.MONGO_TABLE_AUTHOR)
     log.debug("DouyinAuthorDetailSaver Inserting obj {}".format(
         obj.getLastObjectId()))
     self.addStatObject(obj.getLastObjectId(),
                        const_douyin.DATA_TYPE_AUTHOR)
     self.pipe.publish(Message(const_douyin.DATA_TYPE_AUTHOR, uid))
     return
Exemple #16
0
 def handler(self, root, data, urlPack):  # @UnusedVariable
     if type(data) == dict:
         data = [data]
     for info in data:
         obj = dbtools.MongoObject()
         obj.setMeta(const_kuaishou.DATA_TYPE_AUTHOR,
                     const_kuaishou.DATA_PROVIDER,
                     info["profile"]["user_id"],
                     version=const_kuaishou.DATA_VERSION)
         obj.setData(info)
         obj.save(const_kuaishou.MONGO_TABLE_AUTHOR)
         log.debug("KuaiShouAuthorDetailSaver Inserting obj {}".format(
             obj.getLastObjectId()))
         self.addStatObject(obj.getLastObjectId(),
                            const_kuaishou.DATA_TYPE_AUTHOR)
     return
Exemple #17
0
 def handleChallangeDetail(self, root, data, urlPack):  # @UnusedVariable
     log.debug(data)
     data["cha_name"] = data["cha_name"].strip()
     data["desc"] = data["desc"].strip()
     obj = dbtools.MongoObject()
     obj.setMeta(const_douyin.DATA_TYPE_TOPIC,
                 const_douyin.DATA_PROVIDER,
                 data["cid"],
                 version=self.DATA_VERSION)
     obj.setData(data)
     obj.save(const_douyin.MONGO_TABLE_TOPIC)
     self.addStatObject(obj.getLastObjectId(),
                        const_douyin.DATA_TYPE_TOPIC_DETAIL)
     log.debug("DouyinTopicDetailSaver Insert obj {}".format(
         obj.getLastObjectId()))
     return
 def handler(self, root, dataList, urlPack):  # @UnusedVariable
     for entity in dataList:
         data = entity['data']
         authorId = data['author']['id']
         del data["author"]
         obj = dbtools.MongoObject()
         obj.setMeta(const.DATA_TYPE_VIDEO, const_huoshan.DATA_PROVIDER, data["id"])
         obj.setData(data)
         obj.setUserId(authorId)
         obj.save()
         log.debug("HuoshanAuthorVideoListSaver Inserting obj {}".format(obj.getLastObjectId()))
         self.addStatObject(obj.getLastObjectId(), const.DATA_TYPE_VIDEO)
     if root['extra']['has_more']:
         msg = Message(const.DATA_TYPE_AUTHOR, authorId)
         msg.setExtra('max_time', root['extra']['max_time'])
         self.publish(msg)
     return
 def handler(self, root, data, urlPack):  # @UnusedVariable
     log.debug("got tag len={}".format(len(data['tags'])))
     for i, info in enumerate(data["tags"]):
         tag_name = info["tag"].strip()
         md5_key = util.md5(tag_name)
         obj = dbtools.MongoObject()
         obj.setMeta(const.DATA_TYPE_TOPIC, const_kuaishou.DATA_PROVIDER, md5_key, version=const_kuaishou.DATA_VERSION)
         obj.setData(info)
         obj.save()
         log.debug("KuaiShouSearchTagSaver Inserting obj {}, tag={}".format(obj.getLastObjectId(), tag_name))
         self.addStatObject(obj.getLastObjectId(), const.DATA_TYPE_TOPIC)
         msg = Message(const_kuaishou.DATA_TYPE_TAG_NAME, info["tag"])
         msg.setExtra("topic_id", md5_key)
         self.publish(msg)
         if i == len(data['tags']) - 1:
             continue
         time.sleep(40)
     return
 def handler(self, root, data, urlPack):  # @UnusedVariable
     feeds = data["feeds"]
     pcursor = data["pcursor"]
     for info in feeds:
         info[self.pipe.name] = int(time.time())
         obj = dbtools.MongoObject()
         obj.setMeta(const_kuaishou.DATA_TYPE_VIDEO, const_kuaishou.DATA_PROVIDER, info["photo_id"])
         obj.setData(info)
         obj.setUserId(info['user_id'])
         obj.save(const_kuaishou.MONGO_TABLE_VIDEO)
         log.debug("KuaiShouAuthorVideoListSaver Inserting obj {}".format(obj.getLastObjectId()))
         self.addStatObject(obj.getLastObjectId(), const_kuaishou.DATA_TYPE_VIDEO)
         authorId = info['user_id']
     log.debug("KuaiShouAuthorVideoListSaver feed length: {}, pcursor: {}".format(len(feeds), pcursor))
     if len(feeds) > 0:
         msg = Message(const_kuaishou.DATA_TYPE_AUTHOR, authorId)
         msg.setExtra("pcursor", pcursor)
         self.publish(msg)
     return
    def handler(self, root, data, urlPack):  # @UnusedVariable
        if type(data) == dict:
            data = [data]

        for info in data:
            info[self.pipe.name] = int(time.time())
            obj = dbtools.MongoObject()
            obj.setMeta(const_kuaishou.DATA_TYPE_VIDEO,
                        const_kuaishou.DATA_PROVIDER,
                        info["photo_id"],
                        version=const_kuaishou.DATA_VERSION)
            obj.setData(info)
            obj.setUserId(info['user_id'])
            obj.save(const_kuaishou.MONGO_TABLE_VIDEO)
            log.debug("KuaiShouVideoDetailSaver Inserting obj {}".format(
                obj.getLastObjectId()))
            self.addStatObject(obj.getLastObjectId(),
                               const_kuaishou.DATA_TYPE_VIDEO)
            #authorId = info['user_id']

        return
 def handleUserDetail(self, root, data, urlPack):  # @UnusedVariable
     for info in data:
         try:
             user = info["author"]
             uid = user["uid"]
             obj = dbtools.MongoObject()
             obj.setMeta(const_douyin.DATA_TYPE_AUTHOR,
                         const_douyin.DATA_PROVIDER,
                         uid,
                         version=const_douyin.DATA_VERSION)
             obj.setData(user)
             if not obj.db.isItemUpdatedRecently(obj.key):
                 obj.save(const_douyin.MONGO_TABLE_AUTHOR)
                 log.debug(
                     "DouyinAuthorDetailSaver Inserting obj {}".format(
                         obj.getLastObjectId()))
                 self.addStatObject(obj.getLastObjectId(),
                                    const_douyin.DATA_TYPE_AUTHOR)
             else:
                 log.debug("uid:{} is already inserted".format(uid))
         except Exception as e:
             log.fatal("{}".format(e))
             raise e
     return