def run(self, workflow_input): workflow_output = {} tmp_result = {} for processor in self._processors: logger.info('start to run processor: {}'.format( processor.__class__.__name__)) processor.run(workflow_input, tmp_result, workflow_output) logger.info('processor: {} run finish'.format( processor.__class__.__name__)) assert 'result' in workflow_output, 'can not find result in workflow_output!' return workflow_output['result']
def run(self, workflow_input, tmp_result, workflow_output): focuses = {} # get focuses from weibo api for music_type, users in users_config.items(): focuses[music_type] = [] for user_id, user_name in users: retry_time = 0 while retry_time <= 3: if retry_time > 0: logger.info( 'start to retry, current retry time is: {}'.format( retry_time)) try: use_cache = False if retry_time else True user = weibo_api.get_user_info(user_id, user_name, use_cache) user_focuses = weibo_api.get_focuses_by_user( user, use_cache) focuses[music_type].extend(user_focuses) logger.info( 'fetch user: {} data success'.format(user_id)) break except Exception as e: logger.exception( 'fetch user: {} data error! {}'.format(user_id, e)) retry_time += 1 time.sleep(1) time.sleep(1) # distinct, black_list, merge related_users black_list = blacklist_config['all'] + blacklist_config[music_type] tmp_dict = {} for focus in focuses[music_type]: if focus.title in black_list: continue if focus.title not in tmp_dict: tmp_dict[focus.title] = focus else: tmp_dict[focus.title].related_users.extend( focus.related_users) # distinct related_users tmp_dict[focus.title].related_users = list( set(tmp_dict[focus.title].related_users)) focuses[music_type] = list(tmp_dict.values()) # score scores = { music_type: [f.recent_read for f in each_focuses] for music_type, each_focuses in focuses.items() } tmp_result['focuses'] = focuses tmp_result['scores'] = scores
def start(self): while True: for i in range(len(self._workflows)): workflow, workflow_input = self._workflows[ i], self._workflow_inputs[i] try: logger.info('start to run workflow: {}'.format( workflow.__class__.__name__)) result = workflow.run(workflow_input) logger.info('workflow: {} run finish, result: {}'.format( workflow.__class__.__name__, result)) except Exception as e: logger.exception('error for workflow: {}, {}'.format( workflow.__class__.__name__, e)) time.sleep(self._interval)
def run(self, workflow_input, tmp_result, workflow_output): posts = {} for music_type, users in users_config.items(): posts[music_type] = [] for user_id, user_name in users: retry_time = 0 while retry_time <= 3: if retry_time > 0: logger.info( 'start to retry, current retry time is: {}'.format( retry_time)) try: use_cache = False if retry_time else True # 获取当前用户信息及其微博信息 user = weibo_api.get_user_info(user_id, user_name, use_cache) user_posts = weibo_api.get_posts_by_user( user, use_cache) # 过滤旧微博, 并截图 new_user_posts = [] # 用户的新微博 for i, post_element in enumerate( firefox_api.find_elements_in_page( USER_POSTS_URL_FORMATTER.format(user.id), POSTS_CSS_SELECTOR)): if i >= len(user_posts) or user_posts[ i].time <= datetime.now() - timedelta( days=self._before_data): # 过滤旧微博 continue image_path = '{}/{}.png'.format( self._images_dir, user_posts[i].id) user_posts[i].image_path = '{}.png'.format( user_posts[i].id) firefox_api.screenshot(post_element, image_path) new_user_posts.append(user_posts[i]) posts[music_type].extend(new_user_posts) logger.info( 'fetch user: {} data success'.format(user_id)) break except Exception as e: logger.exception( 'fetch user: {} data error! {}'.format(user_id, e)) retry_time += 1 time.sleep(1) time.sleep(1) tmp_result['posts'] = posts
def run(self, workflow_input, tmp_result, workflow_output): videos = {} scores = {} for music_type, users in users_config.items(): videos[music_type] = [] scores[music_type] = [] tmp_set = set() # video去重 for user_id, user_name in users: retry_time = 0 while retry_time <= 3: if retry_time > 0: logger.info( 'start to retry, current retry time is: {}'.format( retry_time)) try: use_cache = False if retry_time else True user = weibo_api.get_user_info(user_id, user_name, use_cache) user_videos = weibo_api.get_videos_by_user( user, use_cache) for video in user_videos: if video.id in tmp_set or video.time <= datetime.now( ) - timedelta(days=self._before_data): continue cover_f_name = '{}.jpg'.format(video.id) _download_img( video.cover_path, '{}/{}'.format(self._image_dir, cover_f_name)) video.cover_path = cover_f_name videos[music_type].append(video) scores[music_type].append(video.view_cnt) tmp_set.add(video.id) logger.info( 'fetch user: {} data success'.format(user_id)) break except Exception as e: logger.exception( 'fetch user: {} data error! {}'.format(user_id, e)) retry_time += 1 time.sleep(1) time.sleep(1) tmp_result['videos'] = videos tmp_result['scores'] = scores