コード例 #1
0
def favourite_cleaner(item):
    item['behot_time'] = time_to_date(item.get('behot_time', 0))
    item['repin_time'] = time_to_date(item.get('repin_time', 0))
    item['source_url'] = URL_HOST + item.get('source_url', '')
    item['media_url'] = URL_HOST + item.get('media_url', '')
    item['image_url'] = 'http:' + item.get('image_url', '')
    return item
コード例 #2
0
        def data_cb(data, uid, cate, c_txt, rp_txt):
            """
            获取用户发布文章、视频、微头条API的回调函数
            :param data: 今日头条接口返回的原始json数据
            :param uid: 当前用户uid
            :param cate: 爬取模式,ARTICLE,VIDEO,WEITT之一
            :param c_txt: 评论互动的内容,无则置None
            :param rp_txt: 转发并评论 互动的内容,无则置None
            """

            global c_count
            global r_count

            shake = 0
            if cate == WEITT:
                id_key = 'wid'
                t_key = 'create_time'
            else:
                id_key = 'item_id'
                t_key = 'behot_time'
            group_id = data.get(id_key)
            c_time = data.get(t_key)
            if not c_time and cate == WEITT:
                data = weitt_cleaner(data)
                shake = 1
                c_time = data.get(t_key)
                if not c_time:
                    c_time = time_to_date(data.get('comment_base').get(t_key))
            else:
                c_time = time_to_date(int(c_time))
            if not group_id:
                if not shake:
                    data = weitt_cleaner(data)
                group_id = data.get('wid')
            if not group_id or not c_time:
                return 1
            if c_txt:
                if c_count < comment_count:
                    if all([comment_start_time, comment_end_time]):
                        if (comment_start_time <= c_time <= comment_end_time):
                            self.account.post_comment(c_txt, group_id)
                            c_count += 1
                        else:
                            return 1
                    else:
                        self.account.post_comment(c_txt, group_id)
                        c_count += 1
            if rp_txt:
                if r_count < repost_count:
                    if all([repost_start_time, repost_end_time]):
                        if repost_start_time <= c_time <= repost_end_time:
                            self.account.repost(rp_txt, group_id, uid)
                            r_count += 1
                        else:
                            return 1
                    else:
                        self.account.repost(rp_txt, group_id, uid)
                        r_count += 1
コード例 #3
0
ファイル: timer.py プロジェクト: steveobd/TTBot
 def run(self):
     while 1:
         ctime = time_to_date(time.time())
         pops = []
         adds = []
         for k,v in self.jobs.items():
             if k <= ctime:
                 if isinstance(v,dict):
                     v = [v]
                 for func_entry in v:
                     kwargs = func_entry.get('kwargs')
                     args = func_entry.get('args')
                     func = func_entry.get('func')
                     kwargs = {} if not kwargs else kwargs
                     args = () if not args else args
                     res = func(*args,**kwargs)
                     callback = func_entry['callback']
                     if callback and callable(callback):
                         callback(res)
                     looping = func_entry.get('looping')
                     frequency = func_entry.get('frequency')
                     args_func = func_entry.get('args_func')
                     kwargs_func = func_entry.get('kwargs_func')
                     if looping:
                         if args_func and callable(args_func):
                             args = args_func(args)
                         if kwargs_func and callable(kwargs_func):
                             kwargs = kwargs_func(kwargs)
                         k_ts = datetime_to_timestamp(k)
                         offset_time = 3600/int(frequency)
                         next_time = time_to_date(k_ts+offset_time)
                         adds.append({
                             next_time:{'func':func,'args':args,
                                          'kwargs':kwargs,'callback':callback,
                                          'looping':looping,'frequency':frequency,
                                          'args_func': args_func, 'kwargs_func': kwargs_func
                                        }
                         })
                 pops.append(k)
                 logger.info(f'定时器任务 时间:{k} {len(v)}个 已经完成. ')
         for k in pops:
             self.jobs.pop(k)
         for i in adds:
             self.jobs.update(i)
             logger.info(f'新增定时器任务:{i.keys()}')
         if not self.jobs:
             logger.info(f'定时器任务已全部执行完毕,退出定时器.')
             return
コード例 #4
0
def data_cleaner(item):
    item['media_url'] = URL_HOST + item.get('media_url','')
    item['source_url'] = URL_HOST + item.get('source_url','')
    item['behot_time'] = time_to_date(item.get('behot_time'))
    item['media_avatar_url'] = 'http:' + item.get('media_avatar_url','')
    item['image_url'] = 'http:' + item.get('image_url', '')
    return item
コード例 #5
0
def published_data_cleaner(item):
    item['behot_time'] = time_to_date(item.get('behot_time'))
    item['image_url'] = 'http:' + item.get('image_url', '')
    item['media_url'] = URL_HOST + item.get('media_url', '')
    item['source_url'] = URL_HOST + item.get('source_url', '')
    item['url'] = URL_ARTICLE_ITEM.format(item_id=item.get('item_id', ''))
    return item
コード例 #6
0
ファイル: grabber.py プロジェクト: steveobd/TTBot
 def __grab(self, uid, mode):
     if not self.db.connected:
         self.db.connect()
     check = self.db.select({
         'uid': uid,
         'mode': mode,
     },
                            tname=FINISHED_TABLE,
                            c_map=False)
     if check:
         logger.info(f'当前用户uid:{uid} 已经被爬取过 [{check[0].get("done_time")}].')
         return 1
     user = TTUser(uid)
     followings = user.get_followings(MDB=1)
     for i in followings:
         self.ID_queue.put_nowait(i.get('user_id'))
     logger.info(f'加载 用户ID:{uid} 关注的uid {len(followings)} 个进入队列.')
     logger.info(f'开始爬取用户ID:{uid} 的头条数据.MODE[{mode}]')
     if mode in [ARTICLE, VIDEO, WEITT]:
         user.get_published(ALL=True, MDB=1, MODE=mode)
     elif mode == 'all':
         for i in [ARTICLE, VIDEO, WEITT]:
             user.get_published(ALL=True, MDB=1, MODE=i)
     else:
         raise ValueError(f'头条用户链式抓取模式 mode 参数值错误:{mode}')
     self.db.save(
         {
             'uid': uid,
             'mode': mode,
             'done_time': time_to_date(time.time()),
         },
         tname=FINISHED_TABLE)
     return 1
コード例 #7
0
def weitt_cleaner(item):
    result = {}
    data = item.get('concern_talk_cell')
    _data = item.get('stream_cell')
    if data:
        result['wid'] = data.get('id')
        json_str = data.get('packed_json_str')
        json_data = json.loads(json_str)
        result.update(json_data)
        result['create_time'] = time_to_date(json_data.get('create_time'))
    elif _data:
        result['wid'] = _data.get('id')
        raw_data = json.loads(_data.get('raw_data'))
        result.update(raw_data)
    return result
コード例 #8
0
def articles_cleaner(item):
    for i in ['modify_time', 'create_time', 'verify_time']:
        item['my_' + i] = time_to_date(item.get(i, 0))
    return item
コード例 #9
0
 def deadline_out(last_time_raw, time_by_minute):
     if last_time_raw is None:
         return
     ctime = time_to_date(time_by_minute)
     if ctime <= last_time_raw:
         return True