def check_status(self): # 这里会不会有问题,造成api访问太过频繁超限? def is_done(status_dict, task_count): if status_dict: all_successed = all(status_dict.values()) if not all_successed: raise WorkerFailed("download-failed") else: if len(status_dict) == task_count: return True return False else: return False key_list = [("%s_status" % key) for key in self.key_list] status_dict = CacheAdpter.get_many(key_list, self.cache_db) has_done = is_done(status_dict, len(self.key_list)) time_interval = 0.1 # 还是给一个超时吧,超时就下载失败。(还要考虑全局的流控,比如今天关键词的报表下载完毕) timeout = len(self.adgroup_id_list) * 60 # 超时时间依据下载的adgroup个数来计算(最好还有一个最大超时时间) st_time = time.time() while (not has_done): time.sleep(time_interval) if time.time() - st_time >= timeout: # 记录状态 log.error("timeout: total %s, finished %s, undone task=%s" % (len(self.key_list), len(status_dict), list(set(self.key_list) - set(status_dict.keys())))) raise WorkerTimeout("download-timeout") status_dict = CacheAdpter.get_many(key_list, self.cache_db) has_done = is_done(status_dict, len(self.key_list)) CacheAdpter.delete_many(key_list, self.cache_db) # 确认了本次任务的状态之后,清掉这些标准位,以免影响下次的判断 return True
def allot_2_workers(self, sub_prj_list, db_name): log.info('start: send msgs to workers') # 先将各个任务状态为working prj_stat_dict = {} data_keys = [] for prj in sub_prj_list: prj['statu'] = 'working' prj_stat_dict[prj['data_key'] + '_statu'] = 'working' data_keys.append(prj['data_key']) CacheAdpter.set_many(prj_stat_dict, db_name, 180) CacheAdpter.delete_many(data_keys, db_name) # 分发任务 for prj in sub_prj_list: # 派活 try: nt = NewThread(JAPI(host='%s:%s' % (prj['host'], prj['port'])).worker_work, prj_dict=prj, is_sync=False) nt.setDaemon(True) nt.start() except Exception, e: log.error('error=%s,prj=%s' % (e, prj)) continue