def upload_test(self): start_time = time.time() q = Manager().Queue() plist = [] for i in range(self.upload_user): proc = Process(target=self.upload_one_user, args=(q,)) plist.append(proc) for proc in plist: proc.start() for proc in plist: proc.join() while True: if q.empty(): break else: if q.get() == 0: self.upload_success += 1 else: self.upload_fail += 1 use_time = time.time() - start_time table = PrettyTable(["key", "value"]) table.add_row(["One File Size (M)", self.upload_file_size]) table.add_row(["All File Size (M)", self.upload_file_size * self.upload_number * self.upload_user]) table.add_row(["Process Count(user)", self.upload_user]) table.add_row(["Upload Count", self.upload_number * self.upload_user]) table.add_row(["Interval Time(s)", self.upload_time]) table.add_row(["Success count", self.upload_success]) table.add_row(["Fail count", self.upload_fail]) table.add_row(["Success ratio (%)", (round(self.upload_success / float(self.upload_number * self.upload_user), 4) * 100)]) table.add_row(["Use time (s)", "%.2f" % use_time]) print table
def main(): arg_parser = argparse.ArgumentParser(description='bbd compressing program') arg_parser.add_argument('-compress_from_dir', type=str, default='.', help='directory where needs to be compressed') arg_parser.add_argument('-compress_to_dir', type=str, default='.', help='directory where puts compressed file') arg_parser.add_argument('-compress_method', default='bz2', choices=['bz2', 'gz'], help='the method of compressing, ' 'support bz2 and gz, bz2 is default') arg_parser.add_argument('-compress_dir_match', default=None, help='regular expressions what matches ' 'which directories can be compressed') arg_parser.add_argument('-compress_file_match', default=None, help='regular expressions what matches ' 'which files can be compressed') args = arg_parser.parse_args() kwargs = dict() kwargs['compress_from_dir'] = os.path.abspath(args.compress_from_dir) kwargs['compress_to_dir'] = os.path.abspath(args.compress_to_dir) kwargs['compress_method'] = args.compress_method kwargs['compress_dir_match'] = args.compress_dir_match kwargs['compress_file_match'] = args.compress_file_match print('Operating parameters are as follows:') print('\t' + '\n\t'.join(['{}: {}'.format(k, v) for k, v in kwargs.items()])) if check_compress_proc_is_alive(): return if kwargs['compress_from_dir'] == kwargs['compress_to_dir']: print(kwargs['compress_from_dir'], kwargs['compress_to_dir']) compress_to_dir = os.path.join(kwargs['compress_to_dir'], 'flume_compressed_data') kwargs['compress_to_dir'] = compress_to_dir os.makedirs(compress_to_dir, exist_ok=True) max_worker = cpu_count() if cpu_count() <= 8 else 8 pool_cls = Pool compressed_queue = Manager().Queue() print('using multi processes to compress files') path_mgr = PathUtils(**kwargs) compressed_data_dir = Path(kwargs['target_dir']) / 'bbd_compressed_data_dir' compress_method = kwargs['compress_method'] for file_path in path_mgr.match_need_compress_files(): from_path = str(file_path.absolute()) to_path = str((compressed_data_dir / file_path.name).absolute()) compressed_queue.put((from_path, to_path, compress_method)) if compressed_queue.empty(): print('there is no file need to be compressed, waiting for next checking') return multi_workers(max_worker=max_worker, pool_cls=pool_cls, work=compress_file, compressed_queue=compressed_queue)
def upload_begin(self): plist = [] q = Manager().Queue() with open(self.list_path, 'r') as fp: for i in fp: if not i: break md5_crc32 = i.strip()[:41] if md5_crc32 not in self.tmp_list and len(md5_crc32) == 41: self.tmp_list.append(md5_crc32) self.upload_num += 1 print self.upload_num for md5_crc32_list in self.chunks(self.tmp_list, self.work_count): proc = Process(target=self.upload_file, args=(q, md5_crc32_list,)) plist.append(proc) for proc in plist: proc.start() for proc in plist: proc.join() while True: if q.empty(): break else: r = q.get() if r == 0: self.success += 1 elif r == 1: self.fail += 1 elif r == 2: self.download_fail += 1 else: pass use_time = time.time() - self.start_time table = PrettyTable(["key", "value"]) table.add_row(["Upload Count", len(set(self.tmp_list))]) table.add_row(["Success count", self.success]) table.add_row(["Fail count", self.fail]) table.add_row(["Download Fail", self.download_fail]) table.add_row(["Use time (s)", "%.2f" % use_time]) print table
def startServer(host, port, options): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) s.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) s.bind((host, port)) s.listen(0) queue = Manager().Queue() while True: print "main: waiting for connection" conn, addr = s.accept() print 'main: Connected by', addr data = conn.recv(1024) print 'received port request' p = Process(target = serverNewClient, args = (queue, options, )) p.start() while queue.empty(): time.sleep(0.05) print "queue is still empty" port = queue.get() conn.sendall(str(port) + '\r\n') print "assigned port %d to new client" % port
# 往线程池加入2个task #f1 = pool.submit(return_future,"hello") # with Manager() as mgr: # l = mgr.list() # l=[] # l.append(0) ###q =Queue()用process开子进程时,且非pool时用 q = Manager().Queue() #pool时用 #* # f2 = pool.submit(return_future,q) #* ###p=Process(target=return_future, args=(q,)) ###p.start() #p.join() while 1: tim = time.time() print('c') print('main:', os.getpid()) time.sleep(0.4) if not e.is_set(): #未置位就增加一个子进程到pool f2 = pool.submit(return_future, q, e) if not q.empty(): value = q.get(True) d = value[0] + 1 print print("d=", d) print if d == 11: break # pool.terminate() print(time.time() - tim)
def getData(q): while True: print('move data %s out from queue'%(q.get())) time.sleep(2) if __name__ == '__main__': q = Manager().Queue() pool = Pool(8) for i in range(8): pool.apply_async(generateData, args=(q,)) # generator = Process(target=generateData, args=(q,)) pool.close() mover = Process(target=getData, args=(q,)) # generator.start() mover.start() pool.join() # generator.join() while True: if q.empty() == True: break mover.terminate()# because the while
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷, 并保存至因子数据库 Parameters: -------- :param start_date: datetime-like, str 开始日期, 格式: YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD :param month_end: bool, 默认为True 如果为True, 则只计算月末时点的因子载荷 :param save: bool, 默认为True 是否保存至因子数据库 :param kwargs: 'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False :return: dict 因子载荷 """ # 取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列, 计算DASTD因子载荷 dict_dastd = None for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue logging.info('[%s] Calc DASTD factor loading.' % Utils.datetimelike_to_str(calc_date)) # 遍历个股, 计算个股的DASTD因子值 s = (calc_date - datetime.timedelta( days=risk_ct.DASTD_CT.listed_days)).strftime('%Y%m%d') stock_basics = all_stock_basics[all_stock_basics.list_date < s] ids = [] # 个股代码list dastds = [] # DASTD因子值list if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False if not kwargs['multi_proc']: # 采用单进程计算DASTD因子值 for _, stock_info in stock_basics.iterrows(): logging.info( "[%s] Calc %s's DASTD factor loading." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol)) dastd_data = cls._calc_factor_loading( stock_info.symbol, calc_date) if dastd_data is not None: ids.append(dastd_data['code']) dastds.append(dastd_data['dastd']) else: # 采用多进程并行计算DASTD因子值 q = Manager().Queue() # 队列, 用于进程间通信, 存储每个进程计算的因子载荷 p = Pool(4) # 进程池, 最多同时开启4个进程 for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=( stock_info.symbol, calc_date, q, )) p.close() p.join() while not q.empty(): dastd_data = q.get(True) ids.append(dastd_data['code']) dastds.append(dastd_data['dastd']) date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1] dict_dastd = { 'date': [date_label] * len(ids), 'id': ids, 'factorvalue': dastds } if save: Utils.factor_loading_persistent( cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_dastd, ['date', 'id', 'factorvalue']) # 暂停180秒 logging.info('Suspending for 180s.') time.sleep(180) return dict_dastd
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷, 并保存至因子数据库 Parameters: -------- :param start_date: datetime-like, str 开始日期, 格式: YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD :param month_end: bool, 默认为True 如果为True, 则只计算月末时点的因子载荷 :param save: bool, 默认True 是否保存至因子数据库 :param kwargs: :return: dict 因子载荷 """ # 取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列, 计算筹码分布因子载荷 dict_beta = {} dict_hsigma = {} for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue logging.info('[%s] Calc BETA factor loading.' % Utils.datetimelike_to_str(calc_date)) # 遍历个股, 计算个股BETA因子值 s = (calc_date - datetime.timedelta(days=180)).strftime('%Y%m%d') stock_basics = all_stock_basics[all_stock_basics.list_date < s] ids = [] # 个股代码list betas = [] # BETA因子值 hsigmas = [] # HSIGMA因子值 # 采用单进程计算BETA因子和HSIGMA因子值, # for _, stock_info in stock_basics.iterrows(): # logging.info("[%s] Calc %s's BETA and HSIGMA factor data." % (calc_date.strftime('%Y-%m-%d'), stock_info.symbol)) # beta_data = cls._calc_factor_loading(stock_info.symbol, calc_date) # if beta_data is not None: # ids.append(beta_data['code']) # betas.append(beta_data['beta']) # hsigmas.append(beta_data['hsigma']) # 采用多进程并行计算BETA因子和HSIGMA因子值 q = Manager().Queue() # 队列, 用于进程间通信, 存储每个进程计算的因子载荷 p = Pool(4) # 进程池, 最多同时开启4个进程 for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=( stock_info.symbol, calc_date, q, )) p.close() p.join() while not q.empty(): beta_data = q.get(True) ids.append(beta_data['code']) betas.append(beta_data['beta']) hsigmas.append(beta_data['hsigma']) date_label = Utils.get_trading_days(calc_date, ndays=2)[1] dict_beta = { 'date': [date_label] * len(ids), 'id': ids, 'factorvalue': betas } dict_hsigma = { 'date': [date_label] * len(ids), 'id': ids, 'factorvalue': hsigmas } if save: Utils.factor_loading_persistent( cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_beta, ['date', 'id', 'factorvalue']) hsigma_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.HSIGMA_CT.db_file) Utils.factor_loading_persistent( hsigma_path, Utils.datetimelike_to_str(calc_date, dash=False), dict_hsigma, ['date', 'id', 'factorvalue']) # 休息180秒 logging.info('Suspending for 180s.') time.sleep(180) return dict_beta
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷,并保存至因子数据库 Parameters -------- :param start_date: datetime-like, str 开始日期,格式:YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期,如果为None,则只计算start_date日期的因子载荷,格式:YYYY-MM-DD or YYYYMMDD :param month_end: bool,默认True 如果为True,则只计算月末时点的因子载荷 :param save: bool,默认False 是否保存至因子数据库 :param kwargs: 'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False :return: 因子载荷,DataFrame -------- 因子载荷,DataFrame 0. date: 日期 1. id: 证券symbol 2. m0: 隔夜时段动量 3. m1: 第一个小时动量 4. m2: 第二个小时动量 5. m3: 第三个小时动量 6. m4: 第四个小时动量 7. m_normal: 传统动量 """ # 取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) # all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列,计算日内动量因子值 dict_intraday_momentum = None for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue # 计算日内各时段动量因子 dict_intraday_momentum = {'date': [], 'id': [], 'm0': [], 'm1': [], 'm2': [], 'm3': [], 'm4': [], 'm_normal': []} # 遍历个股,计算个股日内动量值 s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d') stock_basics = Utils.get_stock_basics(s) if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False if not kwargs['multi_proc']: # 采用单进程进行计算 for _, stock_info in stock_basics.iterrows(): momentum_data = cls._calc_factor_loading(stock_info.symbol, calc_date) if momentum_data is not None: logging.info("[%s] %s's intraday momentum = (%0.4f,%0.4f,%0.4f,%0.4f,%0.4f,%0.4f)" % (calc_date.strftime('%Y-%m-%d'),stock_info.symbol, momentum_data.m0, momentum_data.m1, momentum_data.m2, momentum_data.m3, momentum_data.m4, momentum_data.m_normal)) dict_intraday_momentum['id'].append(Utils.code_to_symbol(stock_info.symbol)) dict_intraday_momentum['m0'].append(round(momentum_data.m0, 6)) dict_intraday_momentum['m1'].append(round(momentum_data.m1, 6)) dict_intraday_momentum['m2'].append(round(momentum_data.m2, 6)) dict_intraday_momentum['m3'].append(round(momentum_data.m3, 6)) dict_intraday_momentum['m4'].append(round(momentum_data.m4, 6)) dict_intraday_momentum['m_normal'].append(round(momentum_data.m_normal, 6)) else: # 采用多进程并行计算日内动量因子载荷 q = Manager().Queue() # 队列,用于进程间通信,存储每个进程计算的因子载荷 p = Pool(4) # 进程池,最多同时开启4个进程 for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,)) p.close() p.join() while not q.empty(): momentum_data = q.get(True) dict_intraday_momentum['id'].append(momentum_data[0]) dict_intraday_momentum['m0'].append(round(momentum_data[1], 6)) dict_intraday_momentum['m1'].append(round(momentum_data[2], 6)) dict_intraday_momentum['m2'].append(round(momentum_data[3], 6)) dict_intraday_momentum['m3'].append(round(momentum_data[4], 6)) dict_intraday_momentum['m4'].append(round(momentum_data[5], 6)) dict_intraday_momentum['m_normal'].append(round(momentum_data[6], 6)) date_label = Utils.get_trading_days(calc_date, ndays=2)[1] dict_intraday_momentum['date'] = [date_label] * len(dict_intraday_momentum['id']) # 保存因子载荷至因子数据库 if save: # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_intraday_momentum) cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_intraday_momentum, 'periodmomentum', factor_type='raw') # 计算日内各时段动量因子的Rank IC值向量, 并保存 cls._calc_periodmomentum_ic(calc_date, 'month') # 计算最优化权重 if alphafactor_ct.INTRADAYMOMENTUM_CT['optimized']: cls._optimize_periodmomentum_weight(calc_date) # 计算合成日内动量因子 if alphafactor_ct.INTRADAYMOMENTUM_CT['synthesized']: logging.info('[%s] calc synthetic intraday momentum factor loading.' % Utils.datetimelike_to_str(calc_date)) dict_intraday_momentum = {'date': [], 'id': [], 'factorvalue': []} # 读取日内个时段动量因子值 # period_momentum_path = os.path.join(SETTINGS.FACTOR_DB_PATH, alphafactor_ct.INTRADAYMOMENTUM_CT.db_file, 'raw/periodmomentum') # df_factor_loading = Utils.read_factor_loading(period_momentum_path, Utils.datetimelike_to_str(calc_date, False)) df_factor_loading = cls._get_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), factor_name='periodmomentum', factor_type='raw', drop_na=False) if df_factor_loading.shape[0] <= 0: logging.info("[%s] It doesn't exist intraday momentum factor loading." % Utils.datetimelike_to_str(calc_date)) return df_factor_loading.fillna(0, inplace=True) # 读取因子最优权重 factor_weight = cls._get_factor_weight(calc_date) if factor_weight is None: logging.info("[%s] It doesn't exist factor weight.") return # 计算合成动量因子, 合成之前先对日内各时段动量因子进行去极值和标准化处理 arr_factor_loading = np.array(df_factor_loading[['m0', 'm1', 'm2', 'm3', 'm4']]) arr_factor_loading = Utils.normalize_data(arr_factor_loading, treat_outlier=True) arr_factor_weight = np.array(factor_weight.drop('date')).reshape((5, 1)) arr_synthetic_factor = np.dot(arr_factor_loading, arr_factor_weight) dict_intraday_momentum['date'] = list(df_factor_loading['date']) dict_intraday_momentum['id'] = list(df_factor_loading['id']) dict_intraday_momentum['factorvalue'] = list(arr_synthetic_factor.astype(float).round(6).reshape((arr_synthetic_factor.shape[0],))) # 标准化合成动量因子 df_std_intradaymonmentum = Utils.normalize_data(pd.DataFrame(dict_intraday_momentum), columns='factorvalue', treat_outlier=True, weight='eq') # 保存合成因子 if save: # Utils.factor_loading_persistent(synthetic_db_file, Utils.datetimelike_to_str(calc_date, False), dict_intraday_momentum) cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_intraday_momentum, 'IntradayMomentum', factor_type='raw', columns=['date', 'id', 'factorvalue']) cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), df_std_intradaymonmentum, 'IntradayMomentum', factor_type='standardized', columns=['date', 'id', 'factorvalue']) # 休息360秒 logging.info('Suspending for 360s.') time.sleep(360) return dict_intraday_momentum
class Queue_server(object): ''' 初始话公众号队列 @param Tuple wx_lists 公众号列表 ''' def __init__(self ,wx_lists=()): self.__queue = Manager().Queue(-1) self.init_wx_lists(wx_lists) self.__fail_list = Manager().list() ''' 初始话公众号队列 @param Tuple wx_lists 公众号列表 ''' def init_wx_lists(self ,wx_lists=()): for wx in wx_lists: self.put(wx) ''' 添加元素 @param mixed value 要添加的元素 ''' def put(self ,value): self.__queue.put(value) ''' 弹出元素 @return mixed ''' def get(self): if not self.empty(): return self.__queue.get() return False ''' 获取队列 @return mixed ''' def get_wx_lists_queue(self): return self.__queue ''' 获取队列大小 @return int ''' def get_size(self): return self.__queue.qsize() ''' 队列是否为空 @return bool ''' def empty(self): return self.__queue.empty() ''' 添加失败数据 @param tuple wx_data 公众号信息 @return bool ''' def put_fail_wx(self , wx_data): self.__fail_list.append(wx_data) ''' 打印失败列表 ''' def print_fail_list(self ,flush=None): if len(self.__fail_list) > 0 : for fail in self.__fail_list: self.put(fail) print 'the fail wx : {0}' . format(fail) if not flush: self.__fail_list = Manager().list() elif flush: print 'all success' #判断是否有错 def is_have_failed(self): #判断是否有失败的公众号重新加入队列中 return not self.empty()
class MPResult(object): """ Sync result between processes """ MATCH = {} # id -> instance def __init__(self, result): from multiprocessing import Manager # Test result instance self.result = result # Result queue self.queue = Manager().Queue() def __getattr__(self, item): return getattr(self.result, item) @staticmethod def pack_result_storage(storage): """ Pack result from storage """ return [(get_master_id(s[0]), s[1]) for s in storage] def unpack_result_storage(self, storage): """ Unpack result from storage """ unpack_storage = [] for master_id, message in storage: unpack_storage.append( (self.MATCH[master_id], message), ) return unpack_storage def match(self, suite): """ Match id of master process to instance """ self.MATCH[get_suite_master_id(suite)] = suite def match(s): for o in s: if isinstance(o, BaseSuite): self.MATCH[get_suite_master_id(o)] = o match(o) else: self.MATCH[get_case_master_id(o)] = o match(suite) def save_result(self): """ Save result in queue """ self.queue.put( ( ( self.pack_result_storage(self.result.errors), self.pack_result_storage(self.result.skipped), self.pack_result_storage(self.result.failures), ), self.result.testsRun, ), ) def make_result(self): """ Merge result from queue to result instance """ while not self.queue.empty(): (errors, skipped, failures), run_tests = self.queue.get() self.result.errors.extend(self.unpack_result_storage(errors)) self.result.skipped.extend(self.unpack_result_storage(skipped)) self.result.failures.extend(self.unpack_result_storage(failures)) self.result.testsRun += run_tests
def query_request_new(kb_ids, issue, kb_vers_map): """ tips: cache形式:cache.set(box ,(kb, 0/1), 过期时间) requirement: (1)查看boxs列表, 对比cache中的boxs, 若有空闲的box,则为其赋一个知识库; 若没有空闲box,则在cache中寻找该KB已有的box。 (2)取出请求中所有知识库对应的BOX的地址,单独起进程发送请求,请求结果放入queue param: kb_ids 列表 return: """ # 取cache中已有的kb logger.debug('start get add') start_get_add = time() kb_add_dict = cache.get_many(kb_ids) no_kbs = set(kb_ids) - set(kb_add_dict.keys()) boxs = cache.get('boxs') box_addr_dict = cache.get('box_infos') box_kb_dict = cache.get_many(boxs) box_kb_rest = list( filter(lambda x: (x[1][0] in boxs) and (not x[1][1]), box_kb_dict.items())) boxs_idle = list(filter(lambda x: not cache.get(x), boxs)) logger.debug('boxs_idle:%s' % boxs_idle) # 为cache中没有的kb赋予box boxs_free = [] if kb_add_dict: boxs_free = set(dict(boxs).keys()) - set( dict(kb_add_dict.values()).keys()) else: boxs_free = set(dict(boxs).keys()) if len(boxs_free) < len(no_kbs): rest_kbs = no_kbs[len(boxs_free):] kb_ids = set(kb_ids) - set(rest_kbs) # 写入cache boxs_free_info = filter(lambda x: x[0] in boxs_free, boxs) temp_kb_box_list = list(zip(no_kbs, boxs_free_info)) cache_ret = map(lambda x: cache.set(x[0], x[1], 30 * 60), temp_kb_box_list) logger.debug('cache_ret:%s' % list(cache_ret)) kb_add_dict = cache.get_many(kb_ids) logger.debug('kb_add_dict:%s' % kb_add_dict) logger.debug('------get address time:%.5f' % (time() - start_get_add)) logger.debug('start box-request ') start_request = time() num = len(kb_ids) q = Manager().Queue() p_list = [] for i in range(0, num): kb = kb_ids[i] version = kb_vers_map[kb] add = kb_add_dict[kb][1] logger.debug('Target:%s Add:%s' % (kb, add)) temp_p = Process(target=_writer, args=(q, kb, version, add, issue)) p_list.append(temp_p) temp_p.start() for pr in p_list: pr.join() logger.debug('------box-request time:%.5f' % (time() - start_request)) start_get_msg = time() i = 0 ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []} while not q.empty(): msg = q.get() if 'not_match' in msg.keys(): ret['not_match'].append(msg['not_match']) elif 'fail' in msg.keys(): ret['fail'].append(msg['fail']) else: ret['ans'].append(msg) logger.debug('------%d msg:%s' % (i, msg)) i += 1 logger.debug('------get answers time:%.5f' % (time() - start_get_msg)) # 异步写入zk # set_box_zk.delay(temp_kb_box_list) return ret
def find_pro_ite(ite,url,m_page,max_asin): head_csv={'A': 'ID', 'B': 'Type', 'C': 'SKU', 'D': 'Name', 'E': 'Published', 'F': 'Is featured?', 'G': 'Visibility in catalog', 'H': 'Short description', 'I': 'Description', 'J': 'Date sale price starts', 'K': 'Date sale price ends', 'L': 'Tax status', 'M': 'Tax class', 'N': 'In stock?', 'O': 'Stock', 'P': 'Low stock amount', 'Q': 'Backorders allowed?', 'R': 'Sold individually?', 'S': 'Weight (kg)', 'T': 'Length (cm)', 'U': 'Width (cm)', 'V': 'Height (cm)', 'W': 'Allow customer reviews?', 'X': 'Purchase note', 'Y': 'Sale price', 'Z': 'Regular price', 'AA': 'Categories', 'AB': 'Tags', 'AC': 'Shipping class', 'AD': 'Images', 'AE': 'Download limit', 'AF': 'Download expiry days', 'AG': 'Parent', 'AH': 'Grouped products', 'AI': 'Upsells', 'AJ': 'Cross-sells', 'AK': 'External URL', 'AM': 'Button text', 'AL': 'Position', 'AN': 'Attribute 1 name', 'AO': 'Attribute 1 value(s)', 'AP': 'Attribute 1 visible', 'AQ': 'Attribute 1 global', 'AR': 'Attribute 2 name', 'AS': 'Attribute 2 value(s)', 'AT': 'Attribute 2 visible', 'AU': 'Attribute 2 global', 'AV': 'Meta: pf_size_chart', 'AW': 'Meta: _primary_term_product_cat' } headers= { 'Accept-Language':'en-us,en;q=0.9', 'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36', 'cookie':r'session-id-time=2082787201l; session-id=143-7283193-0434042; ubid-main=135-4071739-0100158; i18n-prefs=USD; session-token=F1LPasjCZIhIQ3/UGQzCK5nsUrdENnnmnEtR39OUktmBknNz2vdXXWxPOt7TEoa6vmDlLxrS6DtUP45Q//el4TorL3P/dxmPofxCIQNxJTdAm4VegAQpnjMFvd1iytSxmCo8A8EpGmHFurR6fehGlCCvQl1+XDM9qcGXvlg6bdKcz2LQv0xkdqye+tM3mHl+vQ8A39yGrNXr0Zdf0zh4t5AWSZAEtibFA7ijzLXESlwl85N8bV4MacnWQl46mWYM; csm-hit=tb:s-M0ECGT4J6MCDVSNCDN6F|1601001176170&t:1601001180260&adb:adblk_no' } #相关设置 url=url csv_path="./pro/{ite}.csv".format(ite=ite) asin_path="./asin/{ite}.txt".format(ite=ite)#可不修改 #新建或者清空asin文件 open(asin_path,"w",encoding="utf8",newline="") #采集asin for i in find_x(): if m_page == -1: pass elif i > m_page: print('采集完{i}页的asin'.format(i=i)) break else: pass try: #获取下一页url和本页面的产品url bs=find_bs(url,headers) print('成功获取bs') page_url,asins=find_page_url(bs,asin_path) print('成功获取产品asin和下一页') str_asins="".join(asins) print("page_url-----",page_url) print("asins------",str_asins) except Exception as e: print(e) print("采集完毕!") print("最后采集页面是",url) break url=page_url #写入表头 with open(csv_path,"w",encoding='utf8',newline='') as f: f_csv=csv.DictWriter(f,head_csv) f_csv.writerow(head_csv) with open(asin_path,'r',encoding='utf8') as f: q_asin_list=set() all_asin=[] q_asins=Manager().Queue() q = Manager().Queue() q_asin=Manager().Queue() for asin in f: t=5 main_asin=re.sub("\n",'',asin) try: find_pro(csv_path,main_asin,headers,head_csv,t,q_asin,q_asin_list,q,q_asins,all_asin,max_asin) print('采集完{asin}'.format(asin=main_asin)) except Exception as e: print('采集{asin}失败,跳过'.format(asin=main_asin),e) while q.empty()==False: q.get() print('成功清空q')
FPS_clock = pygame.time.Clock() game_state = state.GameState() game_gui = gui.GUI(game_state) game_event_handler = event_handler.EventLogic(game_state, game_gui) game_gui.add_handler(game_event_handler) game_gui.draw(game_state.get_state()) pygame.display.update() commandQueue = Manager().Queue() listeningProcess = Process(target=voice_listener, args=(game_event_handler, commandQueue,)) while True: game_gui.draw(game_state.get_state()) game_event_handler.event_handler() if game_state.get_state() == "SSH season voice mode" or game_state.get_state() == "Web season voice mode": if not game_event_handler.queue.empty(): val = game_event_handler.queue.get() if val: listeningProcess.start() else: listeningProcess.terminate() listeningProcess.join() listeningProcess = Process(target=voice_listener, args=(game_event_handler, commandQueue,)) if not commandQueue.empty(): voice_command = commandQueue.get() try: game_event_handler.pipi.say(voice_command % game_gui.bool_to_text[str(game_gui.light_to_string[voice_command])]) except KeyError: pass pygame.display.update() FPS_clock.tick(30)
class HostScanner: def __init__(self, function, number, method, port_range=None, ip_range=None, verbose=False, write_json=False, json_file='./result.json'): self.function = function self.number = number self.method = method self.port_range = port_range self.ip_range = ip_range self.write_json = write_json self._verbose = verbose self.json_file = json_file self._PoolExecutor = ThreadPoolExecutor if self.method == 'thread' else ProcessPoolExecutor self.que = Manager().Queue(10) def _update_json_file(self): if self._verbose: print(f"Write the results in the queue to {self.json_file}") json_update = {} while not self.que.empty(): json_update.update(self.que.get()) if os.path.exists(self.json_file): with open(self.json_file, 'r') as fr: json_content = json.loads(fr.read()) json_content.update(json_update) with open(self.json_file, 'w') as fw: fw.write(json.dumps(json_content, indent=4)) else: with open(self.json_file, 'w') as fw: fw.write(json.dumps(json_update, indent=4)) def _ping_host_ip(self, ip): if self._verbose: print("pid is %s" % os.getpid()) try: res = subprocess.call('ping -c 2 -t 2 %s' % ip, shell=True, stdout=subprocess.PIPE) status = 'Active' if res == 0 else 'Inactive' print(f'{ip} {status}') if self.write_json: if self.method == 'proc': with WRITE_LOCK_PROC: if self.que.full(): self._update_json_file() elif self.method == 'thread': with WRITE_LOCK_THREAD: if self.que.full(): self._update_json_file() self.que.put({ip: status}) except Exception as e: print('Failed to get status for {}: {}'.format(ip, e)) def _scan_host_port(self, port): s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) try: s.connect((self.ip_range, port)) print(f'{port} OPEN') if self.write_json: if self.method == 'proc': with WRITE_LOCK_PROC: if self.que.full(): self._update_json_file() elif self.method == 'thread': with WRITE_LOCK_THREAD: if self.que.full(): self._update_json_file() self.que.put({port: 'OPEN'}) except Exception as e: # Ignore the failed port pass finally: s.close() def _runMultiWorks(self): with self._PoolExecutor(self.number) as Executor: if self.function == 'tcp': print(f'The scanned host is {self.ip_range}') Executor.map(self._scan_host_port, list(self.port_range)) elif self.function == 'ping': Executor.map(self._ping_host_ip, self.ip_range) def run(self): # In order to support multiple debugging, # delete the generated json file in the first run if os.path.exists(self.json_file): os.remove(self.json_file) if self._verbose: print('Start') print('*' * 20) start_time = time.time() self._runMultiWorks() end_time = time.time() if self._verbose: print('*' * 20) print('End') print("Total time spent: %0.2f" % (end_time - start_time)) if self.write_json: print("Writing into {}".format(self.json_file)) self._update_json_file()
def query_request(kb_ids, issue, kb_vers_map): """ requirement: (1)cache中查找知识库的地址,cache中没有的,则为其在cache中没有被其他kb占用的box中选取BOX,并异步写入其TARGET (2)遍历/B/vm/下的子节点,找到空闲节点,为其赋一个知识库,单独起进程发送请求,请求结果放入queue param: kb_ids 列表 return: """ # 取cache中已有的kb logger.debug('start get add') start_get_add = time() kb_add_dict = cache.get_many(kb_ids) no_kbs = set(kb_ids) - set(kb_add_dict.keys()) logger.debug('no_kbs:%s' % no_kbs) # 为cache中没有的kb赋予box boxs = [('/B/83c4ee846cf2/B70/', '192.168.30.187:8000/70'), ('/B/83c4ee846cf2/B74/', '192.168.30.187:8000/74'), ('/B/83c4ee846cf2/B73/', '192.168.30.187:8000/73'), ('/B/83c4ee846cf2/B72/', '192.168.30.187:8000/72'), ('/B/83c4ee846cf2/B71/', '192.168.30.187:8000/71'), ('/B/83c4ee846cf2/B30/', '192.168.30.187:8000/30'), ('/B/83c4ee846cf2/B23/', '192.168.30.187:8000/23'), ('/B/83c4ee846cf2/B22/', '192.168.30.187:8000/22'), ('/B/83c4ee846cf2/B21/', '192.168.30.187:8000/21'), ('/B/83c4ee846cf2/B20/', '192.168.30.187:8000/20'), ('/B/83c4ee846cf2/B27/', '192.168.30.187:8000/27'), ('/B/83c4ee846cf2/B26/', '192.168.30.187:8000/26'), ('/B/83c4ee846cf2/B25/', '192.168.30.187:8000/25'), ('/B/83c4ee846cf2/B24/', '192.168.30.187:8000/24'), ('/B/83c4ee846cf2/B66/', '192.168.30.187:8000/66'), ('/B/83c4ee846cf2/B67/', '192.168.30.187:8000/67'), ('/B/83c4ee846cf2/B64/', '192.168.30.187:8000/64'), ('/B/83c4ee846cf2/B29/', '192.168.30.187:8000/29'), ('/B/83c4ee846cf2/B65/', '192.168.30.187:8000/65'), ('/B/83c4ee846cf2/B28/', '192.168.30.187:8000/28'), ('/B/83c4ee846cf2/B68/', '192.168.30.187:8000/68'), ('/B/83c4ee846cf2/B69/', '192.168.30.187:8000/69'), ('/B/83c4ee846cf2/B5/', '192.168.30.187:8000/5'), ('/B/83c4ee846cf2/B4/', '192.168.30.187:8000/4'), ('/B/83c4ee846cf2/B81/', '192.168.30.187:8000/81'), ('/B/83c4ee846cf2/B3/', '192.168.30.187:8000/3'), ('/B/83c4ee846cf2/B80/', '192.168.30.187:8000/80'), ('/B/83c4ee846cf2/B2/', '192.168.30.187:8000/2'), ('/B/83c4ee846cf2/B83/', '192.168.30.187:8000/83'), ('/B/83c4ee846cf2/B9/', '192.168.30.187:8000/9'), ('/B/83c4ee846cf2/B82/', '192.168.30.187:8000/82'), ('/B/83c4ee846cf2/B8/', '192.168.30.187:8000/8'), ('/B/83c4ee846cf2/B85/', '192.168.30.187:8000/85'), ('/B/83c4ee846cf2/B7/', '192.168.30.187:8000/7'), ('/B/83c4ee846cf2/B84/', '192.168.30.187:8000/84'), ('/B/83c4ee846cf2/B6/', '192.168.30.187:8000/6'), ('/B/83c4ee846cf2/B40/', '192.168.30.187:8000/40'), ('/B/83c4ee846cf2/B41/', '192.168.30.187:8000/41'), ('/B/83c4ee846cf2/B32/', '192.168.30.187:8000/32'), ('/B/83c4ee846cf2/B31/', '192.168.30.187:8000/31'), ('/B/83c4ee846cf2/B34/', '192.168.30.187:8000/34'), ('/B/83c4ee846cf2/B33/', '192.168.30.187:8000/33'), ('/B/83c4ee846cf2/B36/', '192.168.30.187:8000/36'), ('/B/83c4ee846cf2/B35/', '192.168.30.187:8000/35'), ('/B/83c4ee846cf2/B38/', '192.168.30.187:8000/38'), ('/B/83c4ee846cf2/B37/', '192.168.30.187:8000/37'), ('/B/83c4ee846cf2/B75/', '192.168.30.187:8000/75'), ('/B/83c4ee846cf2/B76/', '192.168.30.187:8000/76'), ('/B/83c4ee846cf2/B39/', '192.168.30.187:8000/39'), ('/B/83c4ee846cf2/B77/', '192.168.30.187:8000/77'), ('/B/83c4ee846cf2/B78/', '192.168.30.187:8000/78'), ('/B/83c4ee846cf2/B79/', '192.168.30.187:8000/79'), ('/B/83c4ee846cf2/B1/', '192.168.30.187:8000/1'), ('/B/83c4ee846cf2/B19/', '192.168.30.187:8000/19'), ('/B/83c4ee846cf2/B17/', '192.168.30.187:8000/17'), ('/B/83c4ee846cf2/B18/', '192.168.30.187:8000/18'), ('/B/83c4ee846cf2/B90/', '192.168.30.187:8000/90'), ('/B/83c4ee846cf2/B51/', '192.168.30.187:8000/51'), ('/B/83c4ee846cf2/B11/', '192.168.30.187:8000/11'), ('/B/83c4ee846cf2/B52/', '192.168.30.187:8000/52'), ('/B/83c4ee846cf2/B12/', '192.168.30.187:8000/12'), ('/B/83c4ee846cf2/B50/', '192.168.30.187:8000/50'), ('/B/83c4ee846cf2/B10/', '192.168.30.187:8000/10'), ('/B/83c4ee846cf2/B15/', '192.168.30.187:8000/15'), ('/B/83c4ee846cf2/B16/', '192.168.30.187:8000/16'), ('/B/83c4ee846cf2/B13/', '192.168.30.187:8000/13'), ('/B/83c4ee846cf2/B14/', '192.168.30.187:8000/14'), ('/B/83c4ee846cf2/B49/', '192.168.30.187:8000/49'), ('/B/83c4ee846cf2/B48/', '192.168.30.187:8000/48'), ('/B/83c4ee846cf2/B47/', '192.168.30.187:8000/47'), ('/B/83c4ee846cf2/B46/', '192.168.30.187:8000/46'), ('/B/83c4ee846cf2/B45/', '192.168.30.187:8000/45'), ('/B/83c4ee846cf2/B44/', '192.168.30.187:8000/44'), ('/B/83c4ee846cf2/B43/', '192.168.30.187:8000/43'), ('/B/83c4ee846cf2/B42/', '192.168.30.187:8000/42'), ('/B/83c4ee846cf2/B88/', '192.168.30.187:8000/88'), ('/B/83c4ee846cf2/B89/', '192.168.30.187:8000/89'), ('/B/83c4ee846cf2/B86/', '192.168.30.187:8000/86'), ('/B/83c4ee846cf2/B87/', '192.168.30.187:8000/87'), ('/B/83c4ee846cf2/B60/', '192.168.30.187:8000/60'), ('/B/83c4ee846cf2/B61/', '192.168.30.187:8000/61'), ('/B/83c4ee846cf2/B62/', '192.168.30.187:8000/62'), ('/B/83c4ee846cf2/B63/', '192.168.30.187:8000/63'), ('/B/83c4ee846cf2/B58/', '192.168.30.187:8000/58'), ('/B/83c4ee846cf2/B57/', '192.168.30.187:8000/57'), ('/B/83c4ee846cf2/B59/', '192.168.30.187:8000/59'), ('/B/83c4ee846cf2/B54/', '192.168.30.187:8000/54'), ('/B/83c4ee846cf2/B53/', '192.168.30.187:8000/53'), ('/B/83c4ee846cf2/B56/', '192.168.30.187:8000/56'), ('/B/83c4ee846cf2/B55/', '192.168.30.187:8000/55'), ('/B/d204c1d12b8a/B70/', '192.168.30.186:8000/70'), ('/B/d204c1d12b8a/B74/', '192.168.30.186:8000/74'), ('/B/d204c1d12b8a/B73/', '192.168.30.186:8000/73'), ('/B/d204c1d12b8a/B72/', '192.168.30.186:8000/72'), ('/B/d204c1d12b8a/B71/', '192.168.30.186:8000/71'), ('/B/d204c1d12b8a/B30/', '192.168.30.186:8000/30'), ('/B/d204c1d12b8a/B23/', '192.168.30.186:8000/23'), ('/B/d204c1d12b8a/B22/', '192.168.30.186:8000/22'), ('/B/d204c1d12b8a/B21/', '192.168.30.186:8000/21'), ('/B/d204c1d12b8a/B20/', '192.168.30.186:8000/20'), ('/B/d204c1d12b8a/B27/', '192.168.30.186:8000/27'), ('/B/d204c1d12b8a/B26/', '192.168.30.186:8000/26'), ('/B/d204c1d12b8a/B25/', '192.168.30.186:8000/25'), ('/B/d204c1d12b8a/B24/', '192.168.30.186:8000/24'), ('/B/d204c1d12b8a/B66/', '192.168.30.186:8000/66'), ('/B/d204c1d12b8a/B67/', '192.168.30.186:8000/67'), ('/B/d204c1d12b8a/B64/', '192.168.30.186:8000/64'), ('/B/d204c1d12b8a/B29/', '192.168.30.186:8000/29'), ('/B/d204c1d12b8a/B65/', '192.168.30.186:8000/65'), ('/B/d204c1d12b8a/B28/', '192.168.30.186:8000/28'), ('/B/d204c1d12b8a/B68/', '192.168.30.186:8000/68'), ('/B/d204c1d12b8a/B69/', '192.168.30.186:8000/69'), ('/B/d204c1d12b8a/B5/', '192.168.30.186:8000/5'), ('/B/d204c1d12b8a/B4/', '192.168.30.186:8000/4'), ('/B/d204c1d12b8a/B81/', '192.168.30.186:8000/81'), ('/B/d204c1d12b8a/B3/', '192.168.30.186:8000/3'), ('/B/d204c1d12b8a/B80/', '192.168.30.186:8000/80'), ('/B/d204c1d12b8a/B2/', '192.168.30.186:8000/2'), ('/B/d204c1d12b8a/B83/', '192.168.30.186:8000/83'), ('/B/d204c1d12b8a/B9/', '192.168.30.186:8000/9'), ('/B/d204c1d12b8a/B82/', '192.168.30.186:8000/82'), ('/B/d204c1d12b8a/B8/', '192.168.30.186:8000/8'), ('/B/d204c1d12b8a/B85/', '192.168.30.186:8000/85'), ('/B/d204c1d12b8a/B7/', '192.168.30.186:8000/7'), ('/B/d204c1d12b8a/B84/', '192.168.30.186:8000/84'), ('/B/d204c1d12b8a/B6/', '192.168.30.186:8000/6'), ('/B/d204c1d12b8a/B40/', '192.168.30.186:8000/40'), ('/B/d204c1d12b8a/B41/', '192.168.30.186:8000/41'), ('/B/d204c1d12b8a/B32/', '192.168.30.186:8000/32'), ('/B/d204c1d12b8a/B31/', '192.168.30.186:8000/31'), ('/B/d204c1d12b8a/B34/', '192.168.30.186:8000/34'), ('/B/d204c1d12b8a/B33/', '192.168.30.186:8000/33'), ('/B/d204c1d12b8a/B36/', '192.168.30.186:8000/36'), ('/B/d204c1d12b8a/B35/', '192.168.30.186:8000/35'), ('/B/d204c1d12b8a/B38/', '192.168.30.186:8000/38'), ('/B/d204c1d12b8a/B37/', '192.168.30.186:8000/37'), ('/B/d204c1d12b8a/B75/', '192.168.30.186:8000/75'), ('/B/d204c1d12b8a/B76/', '192.168.30.186:8000/76'), ('/B/d204c1d12b8a/B39/', '192.168.30.186:8000/39'), ('/B/d204c1d12b8a/B77/', '192.168.30.186:8000/77'), ('/B/d204c1d12b8a/B78/', '192.168.30.186:8000/78'), ('/B/d204c1d12b8a/B79/', '192.168.30.186:8000/79'), ('/B/d204c1d12b8a/B1/', '192.168.30.186:8000/1'), ('/B/d204c1d12b8a/B19/', '192.168.30.186:8000/19'), ('/B/d204c1d12b8a/B17/', '192.168.30.186:8000/17'), ('/B/d204c1d12b8a/B18/', '192.168.30.186:8000/18'), ('/B/d204c1d12b8a/B90/', '192.168.30.186:8000/90'), ('/B/d204c1d12b8a/B51/', '192.168.30.186:8000/51'), ('/B/d204c1d12b8a/B11/', '192.168.30.186:8000/11'), ('/B/d204c1d12b8a/B52/', '192.168.30.186:8000/52'), ('/B/d204c1d12b8a/B12/', '192.168.30.186:8000/12'), ('/B/d204c1d12b8a/B50/', '192.168.30.186:8000/50'), ('/B/d204c1d12b8a/B10/', '192.168.30.186:8000/10'), ('/B/d204c1d12b8a/B15/', '192.168.30.186:8000/15'), ('/B/d204c1d12b8a/B16/', '192.168.30.186:8000/16'), ('/B/d204c1d12b8a/B13/', '192.168.30.186:8000/13'), ('/B/d204c1d12b8a/B14/', '192.168.30.186:8000/14'), ('/B/d204c1d12b8a/B49/', '192.168.30.186:8000/49'), ('/B/d204c1d12b8a/B48/', '192.168.30.186:8000/48'), ('/B/d204c1d12b8a/B47/', '192.168.30.186:8000/47'), ('/B/d204c1d12b8a/B46/', '192.168.30.186:8000/46'), ('/B/d204c1d12b8a/B45/', '192.168.30.186:8000/45'), ('/B/d204c1d12b8a/B44/', '192.168.30.186:8000/44'), ('/B/d204c1d12b8a/B43/', '192.168.30.186:8000/43'), ('/B/d204c1d12b8a/B42/', '192.168.30.186:8000/42'), ('/B/d204c1d12b8a/B88/', '192.168.30.186:8000/88'), ('/B/d204c1d12b8a/B89/', '192.168.30.186:8000/89'), ('/B/d204c1d12b8a/B86/', '192.168.30.186:8000/86'), ('/B/d204c1d12b8a/B87/', '192.168.30.186:8000/87'), ('/B/d204c1d12b8a/B60/', '192.168.30.186:8000/60'), ('/B/d204c1d12b8a/B61/', '192.168.30.186:8000/61'), ('/B/d204c1d12b8a/B62/', '192.168.30.186:8000/62'), ('/B/d204c1d12b8a/B63/', '192.168.30.186:8000/63'), ('/B/d204c1d12b8a/B58/', '192.168.30.186:8000/58'), ('/B/d204c1d12b8a/B57/', '192.168.30.186:8000/57'), ('/B/d204c1d12b8a/B59/', '192.168.30.186:8000/59'), ('/B/d204c1d12b8a/B54/', '192.168.30.186:8000/54'), ('/B/d204c1d12b8a/B53/', '192.168.30.186:8000/53'), ('/B/d204c1d12b8a/B56/', '192.168.30.186:8000/56'), ('/B/d204c1d12b8a/B55/', '192.168.30.186:8000/55')] boxs_free = [] if kb_add_dict: boxs_free = set(dict(boxs).keys()) - set( dict(kb_add_dict.values()).keys()) else: boxs_free = set(dict(boxs).keys()) if len(boxs_free) < len(no_kbs): rest_kbs = no_kbs[len(boxs_free):] kb_ids = set(kb_ids) - set(rest_kbs) # 写入cache boxs_free_info = filter(lambda x: x[0] in boxs_free, boxs) temp_kb_box_list = list(zip(no_kbs, boxs_free_info)) cache_ret = map(lambda x: cache.set(x[0], x[1], 30 * 60), temp_kb_box_list) logger.debug('cache_ret:%s' % list(cache_ret)) kb_add_dict = cache.get_many(kb_ids) logger.debug('kb_add_dict:%s' % kb_add_dict) logger.debug('------get address time:%.5f' % (time() - start_get_add)) logger.debug('start box-request ') start_request = time() num = len(kb_ids) q = Manager().Queue() p_list = [] for i in range(0, num): kb = kb_ids[i] version = kb_vers_map[kb] add = kb_add_dict[kb][1] logger.debug('Target:%s Add:%s' % (kb, add)) temp_p = Process(target=_writer, args=(q, kb, version, add, issue)) p_list.append(temp_p) temp_p.start() for pr in p_list: pr.join() logger.debug('------box-request time:%.5f' % (time() - start_request)) start_get_msg = time() i = 0 ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []} while not q.empty(): msg = q.get() if 'not_match' in msg.keys(): ret['not_match'].append(msg['not_match']) elif 'fail' in msg.keys(): ret['fail'].append(msg['fail']) else: ret['ans'].append(msg) logger.debug('------%d msg:%s' % (i, msg)) i += 1 logger.debug('------get answers time:%.5f' % (time() - start_get_msg)) # 异步写入zk # set_box_zk.delay(temp_kb_box_list) return ret
def query_request_0(kb_ids, issue, kb_vers_map): """ requirement: (1)zk中查找/B/节点下的vm (2)遍历/B/vm/下的子节点,找到空闲节点,为其赋一个知识库,单独起进程发送请求,请求结果放入queue param: kb_ids 列表 return: """ try: zk = KazooClient(hosts=ZOOKEEPER['HOST']) zk.start() except Exception as e: err_log.error(e) raise Exception(1910) _node_list = zk.get_children('/B/') logger.debug('vm:%s' % _node_list) q = Manager().Queue() ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []} p_list = [] random.shuffle(_node_list) for vm in _node_list: box_list = zk.get_children('/B/' + vm + '/') random.shuffle(box_list) for box in box_list: node = '/B/' + vm + '/' + box + '/' _str, _ = zk.get(node) _dict = json.loads(_str) if _dict['status'] == '0': target = kb_ids.pop() logger.debug('------Target:%s Add:%s' % (target, _dict['Add'])) temp_p = Process(target=_writer, args=(q, target, kb_vers_map[target], _dict['Add'], issue)) p_list.append(temp_p) temp_p.start() if not kb_ids: break if not kb_ids: break else: if kb_ids: ret['no_box'] = kb_ids for pr in p_list: pr.join() zk.stop() i = 0 while not q.empty(): msg = q.get() if 'not_match' in msg.keys(): ret['not_match'].append(msg['not_match']) elif 'fail' in msg.keys(): ret['fail'].append(msg['fail']) else: ret['ans'].append(msg) logger.debug('------%d msg:%s' % (i, msg)) i += 1 logger.debug('get answers finished') return ret
# Set up Processes number_of_processes = 16 for i in range(number_of_processes): worker = MD5Cracker(work_queue, global_namespace) worker.start() workers.append(worker) print "Target Hash: {}".format(hash) maxChars = 13 while_count = 1 for baseWidth in range(1, maxChars + 1): while global_namespace.finished is False: if work_queue.empty(): print "checking passwords width [" + `baseWidth` + "]" # set is width, position, baseString work_queue.put({'width': baseWidth, 'position': 0, 'baseString': ""}) break else: if while_count % 10 == 0: global_namespace.count = 0 while_count = 1 else: print "{:,d} passwords/sec".format(global_namespace.count/while_count) while_count += 1 print "Queue Size: {}".format(work_queue.qsize())
def main(): import argparse import logging import os import yaml parser = argparse.ArgumentParser() parser.add_argument('classifier') parser.add_argument('--postprocess', action="store_true", help='Run postprocessing, close blobs and remove noise') parser.add_argument('videolist', help='A file listed all the videos to be indexed') parser.add_argument('cores', type=int, help='Number of processes of paralellism') args = parser.parse_args() logging.basicConfig(level=logging.WARNING, format="%(asctime)s - %(message)s") classifier = zipfile.ZipFile(args.classifier) global forest0, svmmodels, training_bosts, hist0 forest0, hist0, forest1, hist1, training_bosts, svmmodels, prior = \ load_from_classifier(classifier) classifier.close() KEY_FRAME_PERIOD = 2 # in seconds #queue = Queue.Queue() #data_queue = Queue.Queue() queue = Manager().Queue() data_queue = Manager().Queue() for processes in [4]: video_list = open(args.videolist, 'r') log_file = open('statistics%d.txt' % processes, 'w') fps = 0 fps_count = 0 for video_file in video_list: video_file = video_file.strip() name = os.path.splitext(video_file)[0] file_path = os.path.join(VIDEO_RESOURCE, video_file) log_file.write(file_path+"\n") capture = cv.CaptureFromFile(file_path) frame_rate = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FPS) total_frames = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_COUNT) log_file.write("frame rate: %.3f, total frames: %d\n" % (frame_rate, total_frames)) start_time0 = time.time() key_frame_counter = 0 frame = cv.QueryFrame(capture) os.makedirs("tmp") while frame: cv.SaveImage("tmp/" + name + "%d.png" % key_frame_counter, frame) for i in xrange(int(KEY_FRAME_PERIOD * frame_rate)): frame = cv.QueryFrame(capture) key_frame_counter += 1 for i in xrange(key_frame_counter): data_queue.put(i) start_time = time.time() ps = [] for group in xrange(processes): p = Process(target = calculate_class, args=(name, queue, data_queue, )) #p = threading.Thread(target = calculate_class, args=(name, queue, data_queue, )) p.start() ps.append(p) for p in ps: p.join() elapse_time = time.time() - start_time accuracy_file = open('360.txt', 'w') while not queue.empty(): q_entry = queue.get() frame_counter = q_entry[0] ILP = q_entry[1] accuracy_file.write('%d' % frame_counter) for class_index, score in enumerate(ILP): accuracy_file.write(',%.02f' % score) accuracy_file.write('\n') accuracy_file.close() os.system("rm -rf tmp") log_file.write("decoding time: %.2f, total time: %.2f, key frames: %d, frame per sec: %.3f\n" \ % (start_time - start_time0, elapse_time, key_frame_counter, key_frame_counter / elapse_time)) fps += key_frame_counter / elapse_time fps_count += 1 #time.sleep(10) video_list.close() log_file.write("average fps: %.3f\n" % (fps/fps_count)) log_file.close()
def sub_cmd_multisearch(args): if not (args.m and args.sc): exit(1) config = xq.get_strategy_config(args.sc) pprint.pprint(config) module_name = config["module_name"].replace("/", ".") class_name = config["class_name"] symbol = config['symbol'] md = DBMD(args.m, kl.KLINE_DATA_TYPE_JSON) start_time, end_time = get_time_range(md, symbol, args.r) count = args.count cpus = cpu_count() print("count: %s, cpus: %s" % (count, cpus) ) result_q = Manager().Queue()#Manager中的Queue才能配合Pool task_q = Manager().Queue()#Manager中的Queue才能配合Pool for index in range(count): task_q.put(index) print('Parent process %s.' % os.getpid()) p = Pool(cpus) for i in range(cpus): #p.apply_async(child_process_test, args=(i, task_q, result_q)) p.apply_async(child_process, args=(i, task_q, result_q, args.m, config, module_name, class_name, start_time, end_time)) print('Waiting for all subprocesses done...') p.close() start_time = datetime.now() result = [] while len(result) < count: if result_q.empty(): time.sleep(1) else: value = result_q.get() print("result value: ", value) result.append(value) sys.stdout.write( " %d/%d, cost: %s, progress: %g%% \r" % ( len(result), count, datetime.now() - start_time, round((len(result) / count) * 100, 2) ) ) sys.stdout.flush() print("") #print("result queue(len: %s)" % (result_q.qsize())) p.join() print('All subprocesses done.') sorted_rs = sorted(result, key=lambda x: x[1][0], reverse=True) for r in sorted_rs: #print("r: ", r) info = "%6s %30s %s " % r print(info)
def query_request_z(kb_ids, issue, kb_vers_map): """ requirement: (1)cache中查找知识库的地址,cache中没有的,则为其在cache中没有被其他kb占用的box中选取BOX,并异步写入其TARGET (2)遍历/B/vm/下的子节点,找到空闲节点,为其赋一个知识库,单独起进程发送请求,请求结果放入queue param: kb_ids 列表 return: """ # 取cache中已有的kb # start_get_add = time() # kb_add_dict = cache.get_many(kb_ids) # no_kbs = set(kb_ids) - set(kb_add_dict.keys()) # logger.debug('no_kbs:%s' % no_kbs) # 为cache中没有的kb赋予box if cache.ttl("boxs") == 0: cache.set("boxs", str(_acquire_zk_node()), timeout=None) boxs = eval(cache.get("boxs")) add_dict = list(dict(boxs).values()) # boxs_free = [] # if kb_add_dict: # boxs_free = set(dict(boxs).keys()) - set(dict(kb_add_dict.values()).keys()) # else: # boxs_free = set(dict(boxs).keys()) # if len(boxs_free) < len(no_kbs): # rest_kbs = no_kbs[len(boxs_free):] # kb_ids = set(kb_ids) - set(rest_kbs) # 写入cache # boxs_free_info = filter(lambda x: x[0] in boxs_free, boxs) # temp_kb_box_list = list(zip(no_kbs, boxs_free_info)) # cache_ret = map(lambda x: cache.set(x[0], x[1], 30 * 60), temp_kb_box_list) # logger.debug('cache_ret:%s' % list(cache_ret)) # kb_add_dict = cache.get_many(kb_ids) # logger.debug('kb_add_dict:%s' % kb_add_dict) # logger.debug('------get address time:%.5f' % (time() - start_get_add)) # start_request = time() num = len(kb_ids) global lock_label while True: if lock_label: lock_label = False break global top_label seed = int(top_label) temp = (seed + 1) % len(boxs) top_label = temp lock_label = True logger.debug(seed) q = Manager().Queue() p_list = [] for i in range(0, num): kb = kb_ids[i] version = kb_vers_map[kb] # add = kb_add_dict[kb][1] add = add_dict[(seed + i) % len(boxs)] # logger.debug('Target:%s Add:%s' % (kb, add)) temp_p = Process(target=_writer, args=(q, kb, version, add, issue)) p_list.append(temp_p) temp_p.start() for pr in p_list: pr.join() # logger.debug('test.q is ' + q.empty()) # logger.debug('------box-request time:%.5f' % (time() - start_request)) # start_get_msg = time() i = 0 ret = {'no_box': [], 'ans': [], 'not_match': [], 'fail': []} while not q.empty(): msg = q.get() if 'not_match' in msg.keys(): ret['not_match'].append(msg['not_match']) elif 'fail' in msg.keys(): ret['fail'].append(msg['fail']) else: ret['ans'].append(msg) logger.debug('------%d msg:%s' % (i, msg)) i += 1 # logger.debug('------get answers time:%.5f' % (time() - start_get_msg)) # 异步写入zk # set_box_zk.delay(temp_kb_box_list) return ret
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷, 并保存至因子数据库 Parameters: -------- :param start_date: datetime-like, str 开始日期, 格式: YYYY-MM-DD or YYYYMMDD :param end_date: datetime-like, str 结束日期, 如果为None, 则只计算start_date日期的因子载荷, 格式: YYYY-MM-DD or YYYYMMDD :param month_end: bool, 默认为True 如果为True, 则只计算月末时点的因子载荷 :param save: bool, 默认为True 是否保存至因子数据库 :param kwargs: 'multi_proc': bool, True=采用多进程, False=采用单进程, 默认为False :return: dict 因子载荷数据 """ # 取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 遍历交易日序列, 计算LIQUIDITY因子载荷 dict_raw_liquidity = None for calc_date in trading_days_series: if month_end and (not Utils.is_month_end(calc_date)): continue dict_stom = None dict_stoq = None dict_stoa = None dict_raw_liquidity = None logging.info('[%s] Calc Liquidity factor loading.' % Utils.datetimelike_to_str(calc_date)) # 遍历个股,计算个股LIQUIDITY因子值 s = (calc_date - datetime.timedelta(days=risk_ct.LIQUIDITY_CT.listed_days)).strftime('%Y%m%d') stock_basics = all_stock_basics[all_stock_basics.list_date < s] ids = [] stoms = [] stoqs = [] stoas = [] raw_liquidities = [] if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False if not kwargs['multi_proc']: # 采用单进程计算LIQUIDITY因子值 for _, stock_info in stock_basics.iterrows(): logging.debug("[%s] Calc %s's LIQUIDITY factor loading." % (Utils.datetimelike_to_str(calc_date, dash=True), stock_info.symbol)) liquidity_data = cls._calc_factor_loading(stock_info.symbol, calc_date) if liquidity_data is not None: ids.append(liquidity_data['code']) stoms.append(liquidity_data['stom']) stoqs.append(liquidity_data['stoq']) stoas.append(liquidity_data['stoa']) raw_liquidities.append(liquidity_data['liquidity']) else: # 采用多进程计算LIQUIDITY因子值 q = Manager().Queue() p = Pool(4) for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=(stock_info.symbol, calc_date, q,)) p.close() p.join() while not q.empty(): liquidity_data = q.get(True) ids.append(liquidity_data['code']) stoms.append(liquidity_data['stom']) stoqs.append(liquidity_data['stoq']) stoas.append(liquidity_data['stoa']) raw_liquidities.append(liquidity_data['liquidity']) date_label = Utils.get_trading_days(start=calc_date, ndays=2)[1] dict_stom = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': stoms}) dict_stoq = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': stoqs}) dict_stoa = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': stoas}) dict_raw_liquidity = dict({'date': [date_label]*len(ids), 'id': ids, 'factorvalue': raw_liquidities}) # 读取Size因子值, 将流动性因子与Size因子正交化 size_factor_path = os.path.join(factor_ct.FACTOR_DB.db_path, risk_ct.SIZE_CT.db_file) df_size = Utils.read_factor_loading(size_factor_path, Utils.datetimelike_to_str(calc_date, dash=False)) df_size.drop(columns='date', inplace=True) df_size.rename(columns={'factorvalue': 'size'}, inplace=True) df_liquidity = pd.DataFrame(dict({'id': ids, 'liquidity': raw_liquidities})) df_liquidity = pd.merge(left=df_liquidity, right=df_size, how='inner', on='id') arr_liquidity = Utils.normalize_data(Utils.clean_extreme_value(np.array(df_liquidity['liquidity']).reshape((len(df_liquidity), 1)))) arr_size = Utils.normalize_data(Utils.clean_extreme_value(np.array(df_liquidity['size']).reshape((len(df_liquidity), 1)))) model = sm.OLS(arr_liquidity, arr_size) results = model.fit() df_liquidity['liquidity'] = results.resid df_liquidity.drop(columns='size', inplace=True) df_liquidity.rename(columns={'liquidity': 'factorvalue'}, inplace=True) df_liquidity['date'] = date_label # 保存因子载荷 if save: str_date = Utils.datetimelike_to_str(calc_date, dash=False) factor_header = ['date', 'id', 'factorvalue'] Utils.factor_loading_persistent(cls._db_file, 'stom_{}'.format(str_date), dict_stom, factor_header) Utils.factor_loading_persistent(cls._db_file, 'stoq_{}'.format(str_date), dict_stoq, factor_header) Utils.factor_loading_persistent(cls._db_file, 'stoa_{}'.format(str_date), dict_stoa, factor_header) Utils.factor_loading_persistent(cls._db_file, 'rawliquidity_{}'.format(str_date), dict_raw_liquidity, factor_header) Utils.factor_loading_persistent(cls._db_file, str_date, df_liquidity.to_dict('list'), factor_header) # 暂停180秒 # logging.info('Suspending for 180s.') # time.sleep(180) return dict_raw_liquidity
def calc_factor_loading(cls, start_date, end_date=None, month_end=True, save=False, **kwargs): """ 计算指定日期的样本个股的因子载荷,并保存至因子数据库 Parameters -------- :param start_date: datetime-like, str 开始日期 :param end_date: datetime-like, str,默认None 结束日期,如果为None,则只计算start_date日期的因子载荷 :param month_end: bool,默认True 只计算月末时点的因子载荷,该参数只在end_date不为None时有效,并且不论end_date是否为None,都会计算第一天的因子载荷 :param save: 是否保存至因子数据库,默认为False :param kwargs: 'multi_proc': bool, True=采用多进程并行计算, False=采用单进程计算, 默认为False :return: 因子载荷,DataFrame -------- 因子载荷,DataFrame 0: id, 证券ID 1: factorvalue, 因子载荷 如果end_date=None,返回start_date对应的因子载荷数据 如果end_date!=None,返回最后一天的对应的因子载荷数据 如果没有计算数据,返回None """ # 1.取得交易日序列及股票基本信息表 start_date = Utils.to_date(start_date) if end_date is not None: end_date = Utils.to_date(end_date) trading_days_series = Utils.get_trading_days(start=start_date, end=end_date) else: trading_days_series = Utils.get_trading_days(end=start_date, ndays=1) # all_stock_basics = CDataHandler.DataApi.get_secu_basics() # 2.遍历交易日序列,计算APM因子载荷 dict_apm = None for calc_date in trading_days_series: dict_apm = {'date': [], 'id': [], 'factorvalue': []} if month_end and (not Utils.is_month_end(calc_date)): continue # 2.1.遍历个股,计算个股APM.stat统计量,过去20日收益率,分别放进stat_lst,ret20_lst列表中 s = (calc_date - datetime.timedelta(days=90)).strftime('%Y%m%d') stock_basics = Utils.get_stock_basics(s) stat_lst = [] ret20_lst = [] symbol_lst = [] if 'multi_proc' not in kwargs: kwargs['multi_proc'] = False if not kwargs['multi_proc']: # 采用单进程计算 for _, stock_info in stock_basics.iterrows(): stat_i = cls._calc_factor_loading(stock_info.symbol, calc_date) ret20_i = Utils.calc_interval_ret(stock_info.symbol, end=calc_date, ndays=20) if stat_i is not None and ret20_i is not None: stat_lst.append(stat_i) ret20_lst.append(ret20_i) symbol_lst.append( Utils.code_to_symbol(stock_info.symbol)) logging.info('APM of %s = %f' % (stock_info.symbol, stat_i)) else: # 采用多进程并行计算 q = Manager().Queue() p = Pool(4) # 最多同时开启4个进程 for _, stock_info in stock_basics.iterrows(): p.apply_async(cls._calc_factor_loading_proc, args=( stock_info.symbol, calc_date, q, )) p.close() p.join() while not q.empty(): apm_value = q.get(True) symbol_lst.append(apm_value[0]) stat_lst.append(apm_value[1]) ret20_lst.append(apm_value[2]) assert len(stat_lst) == len(ret20_lst) assert len(stat_lst) == len(symbol_lst) # 2.2.构建APM因子 # 2.2.1.将统计量stat对动量因子ret20j进行截面回归:stat_j = \beta * Ret20_j + \epsilon_j # 残差向量即为对应个股的APM因子 # 截面回归之前,先对stat统计量和动量因子进行去极值和标准化处理 stat_arr = np.array(stat_lst).reshape((len(stat_lst), 1)) ret20_arr = np.array(ret20_lst).reshape((len(ret20_lst), 1)) stat_arr = Utils.clean_extreme_value(stat_arr) stat_arr = Utils.normalize_data(stat_arr) ret20_arr = Utils.clean_extreme_value(ret20_arr) ret20_arr = Utils.normalize_data(ret20_arr) # 回归分析 # ret20_arr = sm.add_constant(ret20_arr) apm_model = sm.OLS(stat_arr, ret20_arr) apm_result = apm_model.fit() apm_lst = list(np.around(apm_result.resid, 6)) # amp因子载荷精确到6位小数 assert len(apm_lst) == len(symbol_lst) # 2.2.2.构造APM因子字典,并持久化 date_label = Utils.get_trading_days(calc_date, ndays=2)[1] dict_apm = { 'date': [date_label] * len(symbol_lst), 'id': symbol_lst, 'factorvalue': apm_lst } df_std_apm = Utils.normalize_data(pd.DataFrame(dict_apm), columns='factorvalue', treat_outlier=True, weight='eq') if save: # Utils.factor_loading_persistent(cls._db_file, calc_date.strftime('%Y%m%d'), dict_apm) cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), dict_apm, 'APM', factor_type='raw', columns=['date', 'id', 'factorvalue']) cls._save_factor_loading(cls._db_file, Utils.datetimelike_to_str(calc_date, dash=False), df_std_apm, 'APM', factor_type='standardized', columns=['date', 'id', 'factorvalue']) # # 2.3.构建PureAPM因子 # # 将stat_arr转换为DataFrame, 此时的stat_arr已经经过了去极值和标准化处理 # df_stat = DataFrame(stat_arr, index=symbol_lst, columns=['stat']) # # 取得提纯的因变量因子 # df_dependent_factor = cls.get_dependent_factors(calc_date) # # 将df_stat和因变量因子拼接 # df_data = pd.concat([df_stat, df_dependent_factor], axis=1, join='inner') # # OLS回归,提纯APM因子 # arr_data = np.array(df_data) # pure_apm_model = sm.OLS(arr_data[:, 0], arr_data[:, 1:]) # pure_apm_result = pure_apm_model.fit() # pure_apm_lst = list(np.around(pure_apm_result.resid, 6)) # pure_symbol_lst = list(df_data.index) # assert len(pure_apm_lst) == len(pure_symbol_lst) # # 构造pure_apm因子字典,并持久化 # dict_pure_apm = {'date': [date_label]*len(pure_symbol_lst), 'id': pure_symbol_lst, 'factorvalue': pure_apm_lst} # pure_apm_db_file = os.path.join(factor_ct.FACTOR_DB.db_path, factor_ct.APM_CT.pure_apm_db_file) # if save: # Utils.factor_loading_persistent(pure_apm_db_file, calc_date.strftime('%Y%m%d'), dict_pure_apm) # # 休息360秒 # logging.info('Suspended for 360s.') # time.sleep(360) return dict_apm
def main(): import argparse import logging import os import yaml import cv global processes global forest0, svmmodels, training_bosts, hist0 parser = argparse.ArgumentParser() parser.add_argument('classifier') parser.add_argument('cores', type=int, help='Number of processes of paralellism') parser.add_argument('--postprocess', action="store_true", help='Run postprocessing, close blobs and remove noise') args = parser.parse_args() logging.basicConfig(level=logging.WARNING, format="%(asctime)s - %(message)s") classifier = zipfile.ZipFile(args.classifier) forest0, hist0, forest1, hist1, training_bosts, svmmodels, prior = \ load_from_classifier(classifier) classifier.close() processes = args.cores pool = Pool(processes = processes) KEY_FRAME_PERIOD = 2 # in seconds q = Manager().Queue() total_frame = 0 new_flag = True while True: if not new_flag: print "wait..." time.sleep(1) stream_list = get_list(CLOUDLET_RESOURCE, STREAM_RESOURCE) new_flag = False prev_stream = None for stream in stream_list: if stream.get("stream_description").find("denatured") == -1 or stream.get("stream_description").find("video") == -1 or stream.get("stream_description").find("pstf") != -1: prev_stream = stream continue ILP_max = [] for i in xrange(len(CLASSES)): ILP_max.append(0) ILP_list = [] for i in xrange(len(CLASSES)): ILP_list.append([]) path, name = stream.get("path").replace("mnt", "cloudletstore").rsplit('/', 1) print os.path.join(path, name) path_p, name_p = prev_stream.get("path").replace("mnt", "cloudletstore").rsplit('/', 1) print os.path.join(path_p, name_p) statinfo = os.stat(os.path.join(path_p, name_p)) prev_stream = stream if statinfo.st_size == 0: continue new_flag = True frame_rate = 30 capture = cv.CaptureFromFile(os.path.join(path, name)) frame_rate = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FPS) total_frames = cv.GetCaptureProperty(capture, cv.CV_CAP_PROP_FRAME_COUNT) frame = cv.QueryFrame(capture) print frame_rate, total_frames print capture start_time = time.time() key_frame_counter_base = 0 while frame: process_num = 0 while frame: cv.SaveImage("indexing" + "%d.png" % process_num, frame) for i in xrange(int(KEY_FRAME_PERIOD * frame_rate)): frame = cv.QueryFrame(capture) process_num += 1 if process_num == processes: break pool.map(calculate_class, [(q, x) for x in xrange(key_frame_counter_base, key_frame_counter_base + process_num)]) while not q.empty(): q_entry = q.get() key_frame_counter = q_entry[0] ILP = q_entry[1] for class_index, score in enumerate(ILP): if score > SCORE_THRESHOLD: ILP_list[class_index].append((key_frame_counter * int(KEY_FRAME_PERIOD * frame_rate) + 1, score)) print (CLASSES[class_index], "%.02f" % score), if score > ILP_max[class_index]: ILP_max[class_index] = score print key_frame_counter_base += process_num for class_index, frame_list in enumerate(ILP_list): if not frame_list: continue frame_list_split = split_frame_list(frame_list, int(KEY_FRAME_PERIOD * frame_rate) * 2) for frame_list, local_max_score in frame_list_split: tag_entry = {} tag_entry["tag"] = CLASSES[class_index] + ":%d" % (ILP_max[class_index] * 100) tag_entry["tag_value"] = local_max_score tag_entry["offset"] = frame_list[0] / frame_rate tag_entry["duration"] = (frame_list[-1] - frame_list[0]) / frame_rate tag_entry["segment"] = stream.get("segment") print tag_entry ret_dict = post(CLOUDLET_RESOURCE, TAG_RESOURCE, tag_entry) if stream.get("stream_description").find("pstf") == -1: stream_entry = {"stream_description": stream.get("stream_description") + "pstf;"} ret_dict = put(CLOUDLET_RESOURCE, stream.get("resource_uri"), stream_entry) elapse_time = time.time() - start_time print "max score:" print [(CLASSES[class_index], "%.02f" % score) for class_index, score in enumerate(ILP_max)] print "total time: %.2f, key frames: %d, frame per sec: %.2f" \ % (elapse_time, key_frame_counter_base, key_frame_counter_base / elapse_time) print
print('退出') break if __name__ == '__main__': print('主进程开始') #构建了一个任务队列 # pageQueue = Queue(300) pageQueue = Manager().Queue(300) for i in range(1, 50): pageQueue.put(i) #构建一个结果队列,存储获取的响应结果 # dataQueue = Queue() dataQueue = Manager().Queue() #打印,查看队列是否为空 print(pageQueue.empty(), dataQueue.empty()) downloadProcess = [] for i in range(0, 3): p1 = Process(target=getdata, args=(pageQueue, dataQueue)) p1.start() downloadProcess.append(p1) for process in downloadProcess: process.join() print('总数', dataQueue.empty(), dataQueue.qsize()) parseProcess = [] for i in range(0, 3): parse1 = Process(target=parsedata, args=(dataQueue, ))
def launch_expeditions( self , task_request_list , moon_name_list=None ): global expedition # ---[ 1 ]------------------------------------------------------ self.log.show( 'Checking Moon list sent by user' ) working_moons = [] if not moon_name_list : self.log.show( 'Traveling to available Moons on Orbit' ) working_moons = self.orbit.values() else : self.log.show( 'Traveling to ' + str( moon_name_list ) ) working_moons = [ self.orbit.get_moon( moon_name ) for moon_name in moon_name_list ] # ---[ 2 ]------------------------------------------------------ self.log.show( 'Build Thread-safe Queues with no maximum size' ) recv_queue = Manager().Queue( )#len(task_request_list) ) send_queue = Manager().Queue( )#len(task_request_list) ) # ---[ 3 ]------------------------------------------------------ self.log.show( 'Enqueue tasks on "send_queue" object' ) for task_obj in task_request_list : send_queue.put_nowait( str(task_obj) ) # "Normal" Objects are note thread safe! self.log.show( 'send_queue = ' + str(send_queue.qsize())+'/'+str(len(task_request_list)) + 'tasks') # ---[ 4 ]------------------------------------------------------ self.log.show( 'Starting up Process Pool' ) pool = Pool(processes=len(working_moons)) for moon in working_moons : #running_expeditions.append( Process( target=expedition , args=(self.name , moon.name , moon.ip , moon.port , taskrequest_queue , taskresponse_queue, ) ) ) # Process Object pool.apply_async( func=expedition , args=(self.name , moon.name , moon.ip , moon.port , send_queue , recv_queue , ) ) # ---[ 5 ]------------------------------------------------------ pool.close() pool.join() self.log.show( 'recv_queue = '+ str(recv_queue.qsize())+'/'+str(len(task_request_list)) + 'tasks' ) tmp = [] while not recv_queue.empty() : tmp.append( recv_queue.get() ) self.log.show( 'closing queue' ) self.log.show( 'return results' ) return tmp
def update_heatmap(slaid, model, mpp_inve, li_ij_tissue, wh_inve, num_ij, n_proc, batch_size, transphorm, use_gpu, is_debug, slide_id_ij_input_output_last_tissue, one_over_n_sliding=1): # make zero heatmap im_heatmap_ij = make_zero_heatmap(num_ij, one_over_n_sliding) im_count_ij = make_zero_heatmap(num_ij, one_over_n_sliding) # set the model as test mode model.eval() # for each tissue position if n_proc > 0: li_xy_inve_tissue = [ tuple(map(lambda a, b: a * b, ij_tissue, wh_inve)) for ij_tissue in li_ij_tissue ] queue_size = batch_size * n_proc queue = Manager().Queue(queue_size) pool = Pool(n_proc) split_points = [] for i in range(n_proc): split_points.append(li_xy_inve_tissue[i::n_proc]) result = pool.map_async( read_region_tissue, [(queue, slaid, mpp_inve, wh_inve, li_xy_inve, transphorm) for li_xy_inve in split_points]) li_ij, li_patch_inve = [], [] while True: if queue.empty(): if not result.ready(): time.sleep(0.5) elif result.ready() and 0 == len(li_patch_inve): break else: patch_inve, i, j = queue.get() li_ij.append((i, j)) li_patch_inve.append(patch_inve) if len(li_patch_inve) == batch_size or \ (result.ready() and queue.empty() and len(li_patch_inve) > 0): batch = Variable(torch.FloatTensor(np.stack(li_patch_inve)), volatile=True) if use_gpu: batch = batch.cuda() start_time = time.time() output = model(batch) elapsed_batch = time.time() - start_time output = output.cpu().data.numpy()[:, 0] logging.debug(f'elapsed time for computing one batch is ' + f'{elapsed_batch:.3f}') n_img_in_this_batch = batch.size(0) for ii in range(n_img_in_this_batch): i, j = li_ij[ii] im_heatmap_ij[j, i] = output[ii] logging.debug(queue.qsize()) li_ij, li_patch_inve = [], [] if not result.successful(): logging.debug('[!] Error: something wrong in result.') pool.close() pool.join() else: im_heatmap_ij = update_heatmap_no_parallel( slaid, model, im_heatmap_ij, mpp_inve, transphorm, li_ij_tissue, wh_inve, batch_size, use_gpu, is_debug, slide_id_ij_input_output_last_tissue) dummy = 0 return im_heatmap_ij, im_count