class _Worker(object): def __init__(self, protocol=None): self.protocol = protocol self.pool = ProcessPoolExecutor(max_workers=1) self.pool.submit(id, 42).result() # start the worker process def run(self, func, *args, **kwargs): """Synchronous remote function call""" input_payload = dumps((func, args, kwargs), protocol=self.protocol) result_payload = self.pool.submit( call_func, input_payload, self.protocol).result() result = loads(result_payload) if isinstance(result, BaseException): raise result return result def memsize(self): workers_pids = [p.pid if hasattr(p, "pid") else p for p in list(self.pool._processes)] num_workers = len(workers_pids) if num_workers == 0: return 0 elif num_workers > 1: raise RuntimeError("Unexpected number of workers: %d" % num_workers) return psutil.Process(workers_pids[0]).memory_info().rss def close(self): self.pool.shutdown(wait=True)
def on_message(self, message): print len(message) result = yield tornado.gen.Task(self.process_message, message) return pool = ProcessPoolExecutor() fut = pool.submit(call_process, message) ret = yield fut pool.shutdown()
def splice_gmaps(threadpool, tilefolder, tempfiles, name): processpool = ProcessPoolExecutor() caption = "Rendering Zoom Layers {}".format(name) loadingbar = Bar(caption=caption) loadingbar.set_progress(0, caption) pygame.display.update() side = 1600 zoom_levels = 4 factor = 2 ** (zoom_levels - 1) masterside = side * factor plates = generate_plate_coords(factor, tempfiles) master_surface = pygame.Surface((masterside, masterside)) done = 0 total = len(tempfiles) + len(plates) * sum((4 ** x for x in range(zoom_levels))) fraction = 100 / total def render_base_to_master(task): imgdata, size, location = task.result() tempsurf = pygame.image.frombuffer(imgdata, size, "RGB") master_surface.blit(tempsurf, location) tasks = [] for masterpos, pieces in plates.items(): master_surface.fill((132, 170, 248)) for x, y in pieces: task = processpool.submit(unpack, tempfiles, x, y, ((x % factor) * side, (y % factor) * side)) tasks.append(threadpool.submit(render_base_to_master, task)) tasks.append(task) current_area = masterside for task in tasks: task.result() done += 0.5 loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total)) for z in range(zoom_levels): tasks = [] pieces = masterside // current_area x_off = masterpos[0] * pieces y_off = masterpos[1] * pieces for xp in range(pieces): for yp in range(pieces): temp = pygame.Surface.subsurface(master_surface, (xp * current_area, yp * current_area, current_area, current_area)) filename = "screen_{}_{}_{}.png".format(z + 1, x_off + xp, y_off + yp) data = pygame.image.tostring(temp, "RGB") tasks.append(processpool.submit(render_plate, data, tilefolder, temp.get_size(), side, filename)) for task in tasks: task.result() done += 1 loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total)) current_area //= 2 processpool.shutdown()
def post(self): file = self.request.files['file'][0] hark.client.login() hark.client.createSession(default_hark_config) log.info("Uploading asynchrounously") pool = ProcessPoolExecutor(max_workers=2) future = pool.submit(async_upload, file) yield future pool.shutdown() log.info("Rendering visualization page") self.render('visualize.html')
class ConcurrentDownloader(BaseDownloader, ConcurrentMixin): """Concurrent ProcessPoolExecutor downloader :param pool_size: size of ThreadPoolExecutor :param timeout: request timeout in seconds """ def __init__( self, worker_class, worker_kwargs=None, pool_size=5, middlewares=None,): # configure executor self.pool_size = pool_size self.executor = ProcessPoolExecutor(max_workers=self.pool_size) # prepare worker params self.worker_params = { 'worker_class': worker_class, 'worker_kwargs': worker_kwargs or {}, } # ctrl-c support for python2.x # trap sigint signal.signal(signal.SIGINT, lambda s, f: s) super(ConcurrentDownloader, self).__init__( middlewares=middlewares ) def get(self, requests): for request in requests: # delegate request processing to the executor future = self.executor.submit( _run_download_worker, self.worker_params, request, ) # build Planned object done_future = Planned() # when executor finish request - fire done_future future.add_done_callback( partial(self._done, request, done_future) ) yield done_future def get_workers_count(self): return self.pool_size def stop(self): self.executor.shutdown()
def runParallelTqdm(func, arglist, workers=1): """Handle multiple tasks with tqdm bar in parallel. The function to be run must include keyword argument "vid", which should be passed to tqdm's position. Args: func (callable): The function you want to run in parallel example: func(**kwarg, vid) arglist (dict/list of dict): arguments for specified function. should be a list of keyword dictionaries. workers (int, optional): The number of processes run in parallel At least 1, won't exceed the number of cpu cores. Returns: [list]: returns of your function in the same order of the arglist """ if not isinstance(arglist, list): arglist = [arglist] workers = min(max(workers, 1), os.cpu_count()) slotManager = Manager() opened = slotManager.list(range(workers - 1, -1, -1)) filled = slotManager.dict() pb = tqdm(total=len(arglist), desc="Overall", leave=True, position=workers, ascii=(os.name == "nt"), unit="task", mininterval=0.2) executor = ProcessPoolExecutor(max_workers=workers) tasks = [ executor.submit(_worker, func, args, opened, filled) for args in arglist ] for _ in as_completed(tasks): # Adjust Overall progress bar position if len(executor._pending_work_items) < workers: pb.clear() pb.pos = (-max(filled.values()) - 1) if filled else 0 pb.refresh() pb.update(1) executor.shutdown(wait=True) pb.close() return [task.result() for task in tasks]
async def main(): executor = ProcessPoolExecutor(4, initializer=reader.initializer, initargs=(log, db_path, 'mainnet', 1.0, True)) #await run_times(executor, 4, show=False) #await run_times(executor, 1) await run_times(executor, 2**3) await run_times(executor, 2**5) await run_times(executor, 2**7) #await run_times(executor, 2**9) #await run_times(executor, 2**11) #await run_times(executor, 2**13) executor.shutdown(True)
def unzipfile(tofilepath): for root, dirs, files in os.walk(tofilepath): pool = ProcessPoolExecutor(max_workers=20) for file in files: #增加判断条件 if file[-4:] == '.bz2': filefullname = os.path.join(root, file) unzipfile = filefullname[:-4] print unzipfile if not os.path.exists(unzipfile): # 需要增加判断条件,如果解压文件存在的话就不解压了, # 但是本身下面的命令会检验文件是否存在 obj = pool.submit(runshell, filefullname) pool.shutdown(wait=True)
def monitor_points(filename, time2wait=1): ''' Read a file with this format: --- ROOM monitor point -- such as: Blumar/Sealand2/SM3B/Biofiltros/Biofiltros1/Oxygen/Saturation called: |--> database |--> room namespace = Blumar/Sealand2/SM3B/Biofiltros/Biofiltros1/Oxygen/Saturation |_____________________________________________> monitor then a dict is formed such as: {database|room: [monitor1, monitor2, monitorn]} then for each ROOM execute a thread to check if the room is stuck: goto: is_room_stuck ''' fpt = open(filename,'rw') lines = fpt.readlines() all_threads = {} temp = [] for data in lines: if data.__contains__('ROOM'): if len(temp) != 0: all_threads.update({database+"|"+room:temp}) temp = [] elif not data.__contains__('#'): data = data.rsplit('\n')[0] namespace = data.split("/") database = namespace[0] room = namespace[2] monitor = namespace[1]+"/"+namespace[2]+"/"+namespace[3]+"/"+namespace[4]+"/"+namespace[5]+"/"+namespace[6] temp.append(monitor) all_threads.update({database+"|"+room:temp}) executor = ProcessPoolExecutor(max_workers=len(all_threads)) tasks_results = [] for k,v in all_threads.iteritems(): task = executor.submit(is_room_stuck,k,v,time2wait) if task.result() is True: msg = 'Check ROOM %s , it seems to be off' % k send_email(room=k, mess=msg) executor.shutdown(wait=True)
class Client: def __init__(self, dispatcher): self._dispatcher = dispatcher self._executor = ProcessPoolExecutor() def shutdown(self, wait): self._executor.shutdown(wait=wait) async def invoke(self, path, *args): return await asyncio.get_event_loop().run_in_executor( self._executor, self._dispatcher.invoke, path, *args )
def main(): start = time.perf_counter() processor = ProcessPoolExecutor(NUM_PROCESSES) futures = [] def render(*args): futures.append(processor.submit(render_image, *args)) for filename in generate_filenames(CORE, CORE_EXCLUDE): render(filename, 0.7, 0.1) # Misc STuff for filename in generate_filenames(MISC_STUFF): render(filename, 0.6, 0.05) # Base Entities for filename in generate_filenames(BASE_ENTITIES, ENTITY_EXCLUDE): render(filename, 0.7, 0.05) # Entites that need more color for filename in generate_filenames(BRIGHT_ENTITIES, ENTITY_EXCLUDE): render(filename, 0.7, 0.10) # Terrain for filename in generate_filenames(TERRAIN, TERRAIN_EXCLUDE): render(filename, 1, 0.4) # Ore for filename in generate_filenames(ORE, ORE_EXCLUDE): render(filename, 0.7, 0.2) # Wait for the all tasks to complete or the first one to raise an exception result = wait(futures, return_when=FIRST_EXCEPTION) # Cancel pending tasks after one failed with an exception for pending in result.not_done: pending.cancel() # Wait for processor to complete all pending tasks that could not be canceled. processor.shutdown() # Retrieve result for all tasks, this will re-raise the exception if the # task failed and cause it to be printed to the console for done in result.done: done.result() print(f"Done in {time.perf_counter() - start:.1f}s")
def run(process_num, *filename): # 输入多个n值,分成多个子任务来计算结果 # 实例化进程池,process_num个进程 executor = ProcessPoolExecutor(process_num) start = time.time() fs = [] # future列表 print(filename[0]) with open(filename[0], 'r') as f: for each_line in f.readlines(): fs.append( executor.submit(weakfilescan, each_line.replace(os.linesep, ''))) wait(fs) # 等待计算结束 end = time.time() duration = end - start print("total cost: %.2fs" % duration) executor.shutdown() # 销毁进程池
def multi_process_submit( func, items, max_workers=10, ): executor = ProcessPoolExecutor(max_workers=max_workers) future_list = [] for item in items: future_list.append( executor.submit(func, *item["args"], **item["kwargs"]) # .add_done_callback() ) done_iter = as_completed(future_list) executor.shutdown(wait=True) return done_iter
def build_lut(cqa_train_data): print("Building lookup table for question and answer tokens") pool = ProcessPoolExecutor(max_workers=8) questions = list(pool.map(tokenize, cqa_train_data, chunksize=1000)) pool.shutdown() print("Finished") maxlen = max([len(q) for q in questions]) unique_tokens = set([t for q in questions for t in q]) ques2idx = {word: idx + 1 for idx, word in enumerate(unique_tokens) } # save 0 for padding answers = set([q['answer'] for q in cqa_train_data]) ans2idx = {ans: idx for idx, ans in enumerate(answers)} return ans2idx, ques2idx, maxlen
def run_experiments(net): csvfile = open(network_path[int(sys.argv[1])] + ".csv", 'a') logwriter = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL) executor = ProcessPoolExecutor(max_workers=32) d_bound = shiva.max_degree(net) max_degree = shiva.max_degree(net) total_triangles = shiva.total_triangles(net) original_lps = [None] * 5 for d in range(5): D = d_bound / (2**d) original_lps[d] = executor.submit(shiva.linear_program_solve, net, D) sample_lps = [[None] * 5] * 5 for d in range(5): D = d_bound / (2**d) for k in range(5): p = 1 / (2**(k + 1)) sample_lps[d][k] = experiment(executor, net, D, p) for d in range(5): D = d_bound / (2**d) for k in range(5): p = 1 / (2**(k + 1)) if sample_lps[d][k] == -1: sample_lp = -1 else: sample_lp = sum(x.result() for x in sample_lps[d][k]) \ / len(sample_lps[d][k]) logwriter.writerow([ max_degree, total_triangles, D, p, original_lps[d].result(), sample_lp, original_lps[d].result() / sample_lp, ]) executor.shutdown(wait=True)
def _eval_proc(self, jc_entry: JobComponentEntry, train_output_file: str): job_detail = jc_entry.job['job_detail'] eval_args = job_detail.get('eval_args', job_detail.get('evaluation_args')) if eval_args and job_detail.get('test_data_args'): try: if self.is_local: eval_output_file = eval_args.get('output_file', '').strip() if not eval_output_file: comp_name = re.sub(r'\s+', '_', self.job_name) eval_output_file = ComponentOutput.MODEL_EVALUATION_OUTPUT + "-" + comp_name + "_local" ppool = ProcessPoolExecutor(1) print("{}: start evaluation process".format(self.job_name)) ppool.submit(local_eval_func, self.job_name, jc_entry.job, jc_entry.pack_path, jc_entry.export_path, train_output_file, eval_output_file).result() print("{}: evaluation process finished".format( self.job_name)) ppool.shutdown(wait=True) else: eval_output_file = eval_args.get('output_file', '').strip() if not eval_output_file: comp_name = re.sub(r'\s+', '_', self.job_name) eval_output_file = ComponentOutput.MODEL_EVALUATION_OUTPUT + "-" + comp_name + "_tfjob" eval_job = copy.deepcopy(jc_entry.job) if eval_args.get('num_test_samples', 0) <= 0: print( "{}: 'num_test_samples' is not set, auto fallback to 1-worker evaluation" .format(self.job_name)) eval_job['num_workers'] = 1 tfjob_launcher = EvalTFJobLauncher( self.job_name + " Evaluation TFjob launcher", args=[ "--job", json.dumps(eval_job), "--pack-path", jc_entry.pack_path, "--export-path", jc_entry.export_path, "--output-file", eval_output_file, "--upstream-output-file", train_output_file ]) tfjob_launcher.run() except Exception as e: print("{}: WARING: evaluation model failed: {}\n{}".format( self.job_name, e, traceback.format_exc())) else: print("{}: skip evaluation step".format(self.job_name))
def get_first_order(G): print("1st order: ") global EV, VE, EV_over_delta, VE_over_delta, node_nbr, node_degree EV = G.EV VE = G.VE EV_over_delta = G.EV_over_delta VE_over_delta = G.VE_over_delta node_nbr = G.node_nbr node_degree = G.node_degree processes_num = 80 pool = ProcessPoolExecutor(max_workers=processes_num) process_list = [] nodes = np.copy(G.nodes) split_num = min(processes_num, int(len(nodes) / 100)) + 1 print("split_num", split_num) np.random.shuffle(nodes) nodes = np.array_split(nodes, split_num) print("Start get first order") for node in nodes: process_list.append(pool.submit(get_first_order_part, node)) alias_n2n_1st = {} node2ff_1st = {} for p in as_completed(process_list): alias_t1, alias_t2 = p.result() alias_n2n_1st.update(alias_t1) node2ff_1st.update(alias_t2) pool.shutdown(wait=True) print("start turn dict to list") nodes = np.copy(G.nodes) alias_n2n_1st_list = [[] for n in nodes] node2ff_1st_list = [[] for n in nodes] for n in nodes: alias_n2n_1st_list[n] = alias_n2n_1st[n] node2ff_1st_list[n] = node2ff_1st[n] return alias_n2n_1st_list, node2ff_1st_list
class LBRYSessionManager(SessionManager): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.query_executor = None self.websocket = None self.metrics = ServerLoadData() self.metrics_loop = None self.running = False if self.env.websocket_host is not None and self.env.websocket_port is not None: self.websocket = AdminWebSocket(self) self.search_cache = self.bp.search_cache self.search_cache['search'] = lrucache(10000) self.search_cache['resolve'] = lrucache(10000) async def process_metrics(self): while self.running: data = self.metrics.to_json_and_reset( {'sessions': self.session_count()}) if self.websocket is not None: self.websocket.send_message(data) await asyncio.sleep(1) async def start_other(self): self.running = True args = dict(initializer=reader.initializer, initargs=(self.logger, 'claims.db', self.env.coin.NET, self.env.database_query_timeout, self.env.track_metrics)) if self.env.max_query_workers is not None and self.env.max_query_workers == 0: self.query_executor = ThreadPoolExecutor(max_workers=1, **args) else: self.query_executor = ProcessPoolExecutor( max_workers=self.env.max_query_workers or max(os.cpu_count(), 4), **args) if self.websocket is not None: await self.websocket.start() if self.env.track_metrics: self.metrics_loop = asyncio.create_task(self.process_metrics()) async def stop_other(self): self.running = False if self.env.track_metrics: self.metrics_loop.cancel() if self.websocket is not None: await self.websocket.stop() self.query_executor.shutdown()
def process(pixel_path, base_data_dir, session): conn = pg.open(CONFIG["Database"]["URI"]) print("Query Images.....") prep_stmt = conn.query( "SELECT snapshot_id, detection_id, runguid::text, imagepath, view_matrix, proj_matrix, handle, pos::bytea, rot::bytea, bbox," "ngv_box3dpolygon(bbox3d)::bytea as fullbox," "ST_MakePoint(ST_XMin(bbox3d), ST_YMin(bbox3d), ST_ZMin(bbox3d))::bytea as bbox3d_min," "ST_MakePoint(ST_XMax(bbox3d), ST_YMax(bbox3d), ST_ZMax(bbox3d))::bytea as bbox3d_max FROM detections JOIN snapshots USING (snapshot_id) JOIN runs USING (run_id) JOIN sessions USING(session_id)" "WHERE session_id=$1 and processed=false and camera_pos <-> pos < 200 order by snapshot_id desc", session) pbar = ProgressBar(max_value=len(prep_stmt)).start() i = 0 sem = Semaphore(100) pool = ProcessPoolExecutor(100) results = [] conn.close() lck = Lock() def on_done(snapshot_id, x): result = x.result() sem.release() with lck: nonlocal i nonlocal results pbar.update(i) i += 1 #if result is None: return #results.append((snapshot_id, x.result()[0], x.result()[1])) upload([(snapshot_id, result[0], result[1])], Path(pixel_path)) last_id = 0 for snapshot_id, detections in groupby(prep_stmt, key=lambda x: x['snapshot_id']): sem.acquire() detections = list(detections) last_id = snapshot_id #on_done(snapshot_id, process_detections(base_data_dir, detections)) result = pool.submit(process_detections, base_data_dir, detections) result.add_done_callback(partial(on_done, snapshot_id)) pool.shutdown(wait=True) pbar.finish() conn = pg.open(CONFIG["Database"]["URI"]) conn.query("UPDATE snapshots set processed=false where snapshot_id=$1", last_id) conn.close() # print(results) return results
class ProcessPoolOpInvoker(ModelOpInvoker): def __init__(self, model, func, n_jobs, persist_method): if isinstance(model, PersistedModel): key = model else: key = persist(model, method=persist_method) ctx = LKContext.INSTANCE _log.info('setting up ProcessPoolExecutor w/ %d workers', n_jobs) kid_tc = proc_count(level=1) self.executor = ProcessPoolExecutor(n_jobs, ctx, _initialize_mp_worker, (key, func, kid_tc, log_queue())) def map(self, *iterables): return self.executor.map(_mp_invoke_worker, *iterables) def shutdown(self): self.executor.shutdown()
class ConanFitService: @inject def __init__(self, default_params_factory: ConanParamsFactory) -> None: self._executor = ProcessPoolExecutor(max_workers=1) self._default_params_factory = default_params_factory def fit(self, data: np.ndarray, params: Optional[ConanFitParams] = None): params = params or self._default_params_factory.create() params_dict = { 'baseline': params.baseline, } cfut = self._executor.submit(contact_angle_fit, data, **params_dict) fut = asyncio.wrap_future(cfut, loop=asyncio.get_event_loop()) return fut def destroy(self) -> None: self._executor.shutdown()
def albumSpider(): print("======= 开始爬 专辑 信息 ===========") startTime = datetime.datetime.now() print(startTime.strftime('%Y-%m-%d %H:%M:%S')) # 所有歌手数量 artists_num = sql.get_all_artist_num() # 批次 batch = math.ceil(artists_num.get('num') / 1000.0) # 构建线程池 pool = ProcessPoolExecutor(3) for index in range(0, batch): pool.submit(saveAlbumBatch, index) pool.shutdown(wait=True) print("======= 结束爬 专辑 信息 ===========") endTime = datetime.datetime.now() print(endTime.strftime('%Y-%m-%d %H:%M:%S')) print("耗时:", (endTime - startTime).seconds, "秒")
def cat_Lable_Cnt_Fun(train_data, y, test_data, config): timer = Timer() cat_feature_list = [ c for c in train_data if c.startswith(CONSTANT.CATEGORY_PREFIX) ] if len(cat_feature_list) == 0: return None # train_data_length = len(train_data) train_data[LABEL] = y row_sp = int(np.ceil((len(train_data) + len(test_data)) / 1000000)) col_sp = int(np.ceil(len(cat_feature_list) / 20)) sp = row_sp * col_sp print( f' **** We should split it as {sp}, {col_sp}-{row_sp} sp to process! ****' ) cols_split = np.array_split(cat_feature_list, sp) data_list = [] for i, cols in enumerate(cols_split): if len(cols) >= 1: pool = ProcessPoolExecutor(4) result_list = pool.map(cat_Lable_Cnt_Fun_sub, [[ train_data[[col, LABEL]], test_data[[col]], col, config['pos_rate'], config[CONSTANT.TRAIN_LEN_OF_TRAIN_VAL] ] for col in cols]) pool.shutdown(wait=True) for i_data in result_list: if i_data is not None: data_list += i_data print(f'{i} split successful') # feature_data = pd.concat(data_list, axis=1, copy=False) # feature_data.columns = name_list # timer.check("label count map done") # del data_list # gc.collect() test_data.drop(cat_feature_list, axis=1, inplace=True) cat_feature_list += [LABEL] train_data.drop(cat_feature_list, axis=1, inplace=True) timer.check("drop") return data_list
class ProcessPoolParallelizer(Parallelizer): """A Parallelizer based on concurrent.futures.ProcessPoolExecutor.""" def __init__(self, options): options.set_smart_defaults(num_tasks=default_num_tasks) if sys.version_info >= (3, 8, 0) and sys.platform != 'win32': ctx = get_context('fork') self.pool = ProcessPoolExecutor(options.num_tasks, mp_context=ctx, initializer=process_initializer) else: self.pool = ProcessPoolExecutor(options.num_tasks, initializer=process_initializer) self.process_func = partial(evaluate_step, options=options) def solve_circuits_parallel(self, tuples): return self.pool.map(self.process_func, tuples) def done(self): self.pool.shutdown()
def parse_page_data(futures): result = futures.result() data = result[0] next_page_url = result[1] if next_page_url: handler = page_pool.submit(down_load_page_data, next_page_url) handler.add_done_callback(parse_page_data) page = data['page'] html = data['data'] # 创建进程池(获取活动详情的页面源码) detail_pool = ProcessPoolExecutor(2) if page == 1: print('解析第一页数据,静态页面') html_element = etree.HTML(html) hot_active = html_element.xpath('//div[@class="hot_detail fn-clear"]') for hot_div in hot_active: # 活动详情的url地址 full_detail_url = 'http://date.jiayuan.com' + hot_div.xpath( './/h2[@class="hot_title"]/a/@href')[0] handler = detail_pool.submit(download_detail_data, full_detail_url) handler.add_done_callback(parse_detail_data) more_active = html_element.xpath( '//ul[@class="review_detail fn-clear t-activiUl"]/li') for more_li in more_active: # 活动详情的url地址 full_detail_url = 'http://date.jiayuan.com' + more_li.xpath( './/a[@class="review_link"]/@href')[0] else: print('解析第' + str(page) + '数据', '非静态页面') # 使用json.loads()将json字符串转换为python数据类型 json_obj = json.loads(html) if isinstance(data, list): # 是列表,说明得到的是正确的数据, print('正在解析数据') for sub_dict in json_obj: id = sub_dict['id'] # http://date.jiayuan.com/activityreviewdetail.php?id=11706 full_detail_url = 'http://date.jiayuan.com/activityreviewdetail.php?id=%s' % id handler = detail_pool.submit(download_detail_data, full_detail_url) handler.add_done_callback(parse_detail_data) detail_pool.shutdown()
def preprocess_transition_probs(sg): ''' Preprocessing of transition probabilities for guiding the random walks. ''' global sG sG = sg G = sG.G is_directed = sG.is_directed print("transition probs: ") alias_nodes = {} for node in tqdm(G.nodes()): unnormalized_probs = [ G[node][nbr]['weight'] / np.sqrt(sG.degree[nbr]) for nbr in sG.neighbors[node] ] # unnormalized_probs = [G[node][nbr]['weight'] for nbr in sG.neighbors[node]] norm_const = sum(unnormalized_probs) normalized_probs = [ float(u_prob) / norm_const for u_prob in unnormalized_probs ] alias_nodes[node] = alias_setup(normalized_probs) triads = {} # Parallel alias edges print("alias edges: ") edges = G.edges() threads_num = 100 pool = ProcessPoolExecutor(max_workers=threads_num) process_list = [] edges = np.array_split(edges, threads_num * 2) for e in edges: process_list.append(pool.submit(alias_some_edges, e)) alias_edges = {} for p in as_completed(process_list): alias_t = p.result() alias_edges.update(alias_t) pool.shutdown(wait=True) sG.alias_nodes = alias_nodes sG.alias_edges = alias_edges
def get_benchmark(paths, car_df, road_df, cross_df, process_num=4): """ 针对直接进行路径规划,假设不堵车的情况,得到理想情况下的运行时间 使用多进程实现 :param paths: 所有车规划出来的路径,数据格式:字典{carID: [edge path]} :param car_df: :param road_df: :return: car_time_cost: 每个车的时间消耗{carID: time cost} all_time_cost: 所有车时间总消耗 """ car_time_cost = {} all_time_cost = 0 carL = list(car_df['id']) carL_len = len(carL) # 为多进程进行分割数据 N = int(carL_len / process_num) splice = [N * x for x in range(process_num)] splice.append(carL_len) # 启动多进程 print('get_benchmark: ') try: p = ProcessPoolExecutor(max_workers=process_num) obj_l = [] for st, ed in zip(splice[:-1], splice[1:]): obj = p.submit(__get_time_cost, paths, carL[st:ed], car_df, road_df) obj_l.append(obj) p.shutdown(wait=True) # 将多进程得到的结果进行整合 # print([len(obj.result()) for obj in obj_l]) for obj in obj_l: car_time_cost.update(obj.result()[0]) all_time_cost += obj.result()[1] except: print("Multi-processing failed, using single processing now") car_time_cost, all_time_cost = __get_time_cost(paths, carL, car_df, road_df) return car_time_cost, all_time_cost
def main_1(snapshotList): voc = VOC_Generator(snapshotList[0].runguid, output_dir) voc.create_folders(voc) sem_tmp = Semaphore(2) pool_tmp = ProcessPoolExecutor(2) lck_tmp = Lock() def on_done_tmp(x): snapshotIDList = x.result() sem_tmp.release() with lck_tmp: voc.saveTrainTest(voc, snapshotIDList) sem_tmp.acquire() result_tmp = pool_tmp.submit(main_fn, snapshotList, voc) result_tmp.add_done_callback(partial(on_done_tmp)) pool_tmp.shutdown(wait=True)
def test_concurrency(self): num_of_dependency_managers = 10 executor = ProcessPoolExecutor(max_workers=num_of_dependency_managers) random_file_path = os.path.join(self.work_dir, "random_file") with open(random_file_path, "wb") as f: f.seek((1024 * 1024 * 1024) - 1) # 1 GB f.write(b"\0") futures = [ executor.submit(task, self.work_dir, self.state_path, random_file_path) for _ in range(num_of_dependency_managers) ] for future in futures: print(future.result()) self.assertIsNone(future.exception()) executor.shutdown()
def _predict_proc(self, jc_entry: JobComponentEntry, train_output_file: str): job_detail = jc_entry.job['job_detail'] if job_detail.get('pred_data_args', job_detail.get('predict_data_args')) and \ job_detail.get('pred_args', job_detail.get('predict_args')): try: ppool = ProcessPoolExecutor(1) print("{}: start prediction process".format(self.job_name)) ppool.submit(predict_func, self.job_name, jc_entry.job, jc_entry.pack_path, jc_entry.export_path, train_output_file).result() print("{}: prediction process finished".format(self.job_name)) ppool.shutdown(wait=True) except Exception as e: print("{}: WARING: prediction failed: {}\n{}".format( self.job_name, e, traceback.format_exc())) else: print("{}: skip prediction step".format(self.job_name))
def make_arch_db(): executor = ProcessPoolExecutor(max_workers=8) by = 10000 m = 60000000 #by = 2000 #m = 10000 e = executor.map(process_range, zip(range(0, m, by),range(by, m+by, by))) executor.shutdown() print('done calculating architectures') pfam_sets = merge(e) print(len(pfam_sets)) gsave(pfam_sets,'pfam_sets.pkl.gz') # mongodb db = MongoClient('wl-cmadmin', 27017).ArchDB_Pfam_071414.ArchDB_Pfam_071414 db.insert(map(lambda item: {'_id': min(item[1]), 'pID': list(item[1]), 'Pfam': item[0]}, pfam_sets.items())) db.ensure_index('pID') db.ensure_index('Pfam')
class LBRYSessionManager(SessionManager): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.query_executor = None async def start_other(self): args = dict(initializer=reader.initializer, initargs=('claims.db', self.env.coin.NET)) if self.env.max_query_workers is not None and self.env.max_query_workers == 0: self.query_executor = ThreadPoolExecutor(max_workers=1, **args) else: self.query_executor = ProcessPoolExecutor( max_workers=self.env.max_query_workers or max(os.cpu_count(), 4), **args) async def stop_other(self): self.query_executor.shutdown()
def handle_stream(self, stream, address): pool = ProcessPoolExecutor(max_workers=1) while True: try: # read protocol version protocol_ver = yield stream.read_bytes(MSG_LEN) protocol_ver = struct.unpack('>I', protocol_ver)[0] # Read message length (4 bytes) and unpack it into an integer raw_msg_length = yield stream.read_bytes(MSG_LEN) msg_length = struct.unpack('>I', raw_msg_length)[0] app_log.debug("Handle request (Protocol: v%d, Msg size: %d)", protocol_ver, msg_length) data = yield stream.read_bytes(msg_length) msg = msgpack.unpackb(data, object_hook=decode_np_array, use_list=False, encoding='utf-8') try: fut = pool.submit(handle_request, msg) response = yield fut except Exception: app_log.exception('Error in subprocess') response = msgpack.packb( { 'status': INTERNAL_SERVER_ERROR, }, default=encode_np_array) yield stream.write(struct.pack('>I', PROTOCOL_VER)) # Prefix each message with a 4-byte length (network byte order) yield stream.write(struct.pack('>I', len(response))) yield stream.write(response) except StreamClosedError: app_log.info("Lost client at host %s", address) break except Exception: app_log.exception('Error while handling client connection') pool.shutdown()
def parallel_build_hash(data, func, args, num, initial=None, compress=False, max_size=-1): import multiprocessing cpu_num = multiprocessing.cpu_count() data = np.array_split(data, cpu_num * 1) dict1 = deepcopy(initial) pool = ProcessPoolExecutor(max_workers=cpu_num) process_list = [] if func == 'build_hash': func = build_hash if func == 'build_hash2': func = build_hash2 if func == 'build_hash3': func = build_hash3 for datum in data: process_list.append(pool.submit(func, datum, compress, max_size)) for p in as_completed(process_list): a = p.result() if compress: dict1 = dict1.union(a) else: dict1.update(a) del a pool.shutdown(wait=True) # if args.data in ['schic','ramani']: # print (num[0]) # new_list_of_set = [set() for i in range(int(num[0]+1))] # for s in dict1: # try: # new_list_of_set[s[0]].add(s) # except: # print (s) # raise EOFError # dict1 = new_list_of_set return dict1
def best_matching_hungarian(all_cors, all_pids_info, all_pids_fff, track_vid_next_fid, weights, weights_fff, num, mag, pool_size=5): x1, y1, x2, y2 = [all_cors[:, col] for col in range(4)] all_grades_details = [] all_grades = [] box1_num = len(all_pids_info) box2_num = track_vid_next_fid['num_boxes'] cost_matrix = np.zeros((box1_num, box2_num)) qsize = box1_num * track_vid_next_fid['num_boxes'] pool = ProcessPoolExecutor(max_workers=pool_size) futures = [] for pid1 in range(box1_num): box1_pos = all_pids_info[pid1]['box_pos'] box1_region_ids = find_region_cors_last(box1_pos, all_cors) box1_score = all_pids_info[pid1]['box_score'] box1_pose = all_pids_info[pid1]['box_pose_pos'] box1_fff = all_pids_fff[pid1] for pid2 in range(1, track_vid_next_fid['num_boxes'] + 1): future = pool.submit(best_matching_hungarian_kernel, pid1, pid2, all_cors, track_vid_next_fid, weights, weights_fff, num, mag, box1_pos, box1_region_ids, box1_score, box1_pose, box1_fff) futures.append(future) pool.shutdown(True) for future in futures: pid1, pid2, grade = future.result() cost_matrix[pid1, pid2 - 1] = grade m = Munkres() indexes = m.compute((-np.array(cost_matrix)).tolist()) return indexes, cost_matrix
class ProcessPoolEvaluator(SubmitEvaluator): def __init__(self, processes=None): try: from concurrent.futures import ProcessPoolExecutor self.executor = ProcessPoolExecutor(processes) super(ProcessPoolEvaluator, self).__init__(self.executor.submit) LOGGER.log(logging.INFO, "Started process pool evaluator") if processes: LOGGER.log(logging.INFO, "Using user-defined number of processes: %d", processes) except ImportError: # prevent error from showing in Eclipse if concurrent.futures not available raise def close(self): LOGGER.log(logging.DEBUG, "Closing process pool evaluator") self.executor.shutdown() LOGGER.log(logging.INFO, "Closed process pool evaluator")
def test_executor(self): m = aioprocessing.AioManager() q = m.AioQueue() p = ProcessPoolExecutor(max_workers=1) val = 4 def submit(): yield p.submit(queue_put, q, val) next(submit()) @asyncio.coroutine def queue_get(): out = yield from q.coro_get() self.assertEqual(out, val) yield from q.coro_put(5) self.loop.run_until_complete(queue_get()) returned = q.get() self.assertEqual(returned, 5) p.shutdown()
def infer_all(db_name): db = pymongo.MongoClient('127.0.0.1', 27017, connect=False).get_database(db_name) executor = ProcessPoolExecutor(max_workers=10) futures = [] for collection_name in db.collection_names(): if not is_q_col(collection_name): continue tid = collection_name[:-2] q_collection = db[collection_name] a_collection = db[q_to_a(collection_name)] for q_doc in q_collection.find({}, {'qid':1, 'topic':1}): qid = q_doc['qid'] aids = [a_doc['aid'] for a_doc in a_collection.find({'qid': qid}, {'aid': 1})] futures.append( executor.submit(infer_question_task(db_name, tid, qid, aids)) ) executor.shutdown()
def execute_parallel(self, executor=None, loop=None): if executor is None: executor = ProcessPoolExecutor() shut_executor = True else: shut_executor = False if loop is None: loop = asyncio.get_event_loop() deps = self.graph.dependency_resolver() next_specs = deps.send(None) task = loop.create_task(self.submit_next_specs(loop, executor, next_specs, deps)) loop.run_until_complete(task) if shut_executor: executor.shutdown()
def main(argv=None): usage = """REDCap Data Model Generator Usage: redcap dball <version> [--dir=DIR] [--db=DB] [--host=HOST] [--port=PORT] [--user=USER] [--pass=PASS] Options: -h --help Show this screen. --dir=DIR Name of the directory to output the files [default: .]. --db=DB Name of the REDCap database [default: redcap]. --host=HOST Host of the database server [default: localhost]. --port=PORT Port of the database server [default: 3306]. --user=USER Username to connect with. --pass=PASS Password to connect with. If set to *, a prompt will be provided. --procs=PROCS Number of processes to spawn [default: 24]. """ # noqa from docopt import docopt args = docopt(usage, argv=argv, version='0.1') if args['--pass'] == '*': args['--pass'] = getpass('password: '******'--db'], args['--host'], args['--port'], args['--user'], args['--pass']) project_names = db_projects(conn) pool = ProcessPoolExecutor(max_workers=int(args['--procs'])) for name in project_names: pool.submit(worker, name, args) pool.shutdown()
def infer_many(db_name, filename): """ 推断一些问题的回答, 读取文件, 每一行格式为 topic,qid,...(后面是什么无所谓) """ db = pymongo.MongoClient('127.0.0.1', 27017, connect=False).get_database(db_name) executor = ProcessPoolExecutor(max_workers=5) count = 0 futures = [] with open(filename) as f: for line in f: tid, qid, _ = line.split(',', maxsplit=2) a_collection = db[a_col(tid)] aids = [a_doc['aid'] for a_doc in a_collection.find({'qid': qid}, {'aid': 1})] futures.append( executor.submit(infer_question_task, db_name, tid, qid, aids) ) count += len(aids) print(count) executor.shutdown()
def Main(): global gSymFileManager, gOptions, gPool if not ReadConfigFile(): return 1 # In a perfect world, we could create a process per cpu core. # But then we'd have to deal with cache sharing gPool = Pool(1) gPool.submit(initializeSubprocess, gOptions) # Setup logging in the parent process. # Ensure this is called after the call to initializeSubprocess to # avoid duplicate messages in Unix systems. SetLoggingOptions(gOptions["Log"]) LogMessage("Starting server with the following options:\n" + str(gOptions)) app = Application([ url(r'/(debug)', DebugHandler), url(r'/(nodebug)', DebugHandler), url(r"(.*)", SymbolHandler)]) app.listen(gOptions['portNumber'], gOptions['hostname']) try: # select on Windows doesn't return on ctrl-c, add a periodic # callback to make ctrl-c responsive if sys.platform == 'win32': PeriodicCallback(lambda: None, 100).start() IOLoop.current().start() except KeyboardInterrupt: LogMessage("Received SIGINT, stopping...") gPool.shutdown() LogMessage("Server stopped - " + gOptions['hostname'] + ":" + str(gOptions['portNumber'])) return 0
def run_in_process(sync_fn, *args): pool = ProcessPoolExecutor(max_workers=1) result = yield pool.submit(sync_fn, *args) pool.shutdown() return result
class BokehTornado(TornadoApplication): ''' A Tornado Application used to implement the Bokeh Server. The Server class is the main public interface, this class has Tornado implementation details. Args: applications (dict of str : bokeh.application.Application) : map from paths to Application instances The application is used to create documents for each session. extra_patterns (seq[tuple]) : tuples of (str, http or websocket handler) Use this argument to add additional endpoints to custom deployments of the Bokeh Server. prefix (str) : a URL prefix to use for all Bokeh server paths secret_key (str) : secret key for signing session IDs sign_sessions (boolean) : whether to sign session IDs generate_session_ids (boolean) : whether to generate a session ID when none is provided extra_websocket_origins (list) : hosts that can connect to the websocket keep_alive_milliseconds (int) : number of milliseconds between keep-alive pings Set to 0 to disable pings. Pings keep the websocket open. check_unused_sessions_milliseconds (int) : number of milliseconds between check for unused sessions unused_session_lifetime_milliseconds (int) : number of milliseconds for unused session lifetime stats_log_frequency_milliseconds (int) : number of milliseconds between logging stats use_index (boolean) : True to generate an index of the running apps in the RootHandler ''' def __init__(self, applications, prefix, extra_websocket_origins, extra_patterns=None, secret_key=settings.secret_key_bytes(), sign_sessions=settings.sign_sessions(), generate_session_ids=True, # heroku, nginx default to 60s timeout, so well less than that keep_alive_milliseconds=37000, # how often to check for unused sessions check_unused_sessions_milliseconds=17000, # how long unused sessions last unused_session_lifetime_milliseconds=15000, # how often to log stats stats_log_frequency_milliseconds=15000, use_index=True, redirect_root=True): self._prefix = prefix self.use_index = use_index if keep_alive_milliseconds < 0: # 0 means "disable" raise ValueError("keep_alive_milliseconds must be >= 0") if check_unused_sessions_milliseconds <= 0: raise ValueError("check_unused_sessions_milliseconds must be > 0") if unused_session_lifetime_milliseconds <= 0: raise ValueError("check_unused_sessions_milliseconds must be > 0") if stats_log_frequency_milliseconds <= 0: raise ValueError("stats_log_frequency_milliseconds must be > 0") self._websocket_origins = set(extra_websocket_origins) self._secret_key = secret_key self._sign_sessions = sign_sessions self._generate_session_ids = generate_session_ids log.debug("These host origins can connect to the websocket: %r", list(self._websocket_origins)) # Wrap applications in ApplicationContext self._applications = dict() for k,v in applications.items(): self._applications[k] = ApplicationContext(v) extra_patterns = extra_patterns or [] all_patterns = [] for key, app in applications.items(): app_patterns = [] for p in per_app_patterns: if key == "/": route = p[0] else: route = key + p[0] route = self._prefix + route app_patterns.append((route, p[1], { "application_context" : self._applications[key] })) websocket_path = None for r in app_patterns: if r[0].endswith("/ws"): websocket_path = r[0] if not websocket_path: raise RuntimeError("Couldn't find websocket path") for r in app_patterns: r[2]["bokeh_websocket_path"] = websocket_path all_patterns.extend(app_patterns) # add a per-app static path if requested by the application if app.static_path is not None: if key == "/": route = "/static/(.*)" else: route = key + "/static/(.*)" route = self._prefix + route all_patterns.append((route, StaticFileHandler, { "path" : app.static_path })) for p in extra_patterns + toplevel_patterns: if p[1] == RootHandler: if self.use_index: data = {"applications": self._applications, "prefix": self._prefix, "use_redirect": redirect_root} prefixed_pat = (self._prefix + p[0],) + p[1:] + (data,) all_patterns.append(prefixed_pat) else: prefixed_pat = (self._prefix + p[0],) + p[1:] all_patterns.append(prefixed_pat) log.debug("Patterns are:") for line in pformat(all_patterns, width=60).split("\n"): log.debug(" " + line) super(BokehTornado, self).__init__(all_patterns) def initialize(self, io_loop, keep_alive_milliseconds=37000, # how often to check for unused sessions check_unused_sessions_milliseconds=17000, # how long unused sessions last unused_session_lifetime_milliseconds=15000, # how often to log stats stats_log_frequency_milliseconds=15000, **kw): self._loop = io_loop for app_context in self._applications.values(): app_context._loop = self._loop self._clients = set() self._executor = ProcessPoolExecutor(max_workers=4) self._stats_job = PeriodicCallback(self.log_stats, stats_log_frequency_milliseconds, io_loop=self._loop) self._unused_session_linger_milliseconds = unused_session_lifetime_milliseconds self._cleanup_job = PeriodicCallback(self.cleanup_sessions, check_unused_sessions_milliseconds, io_loop=self._loop) if keep_alive_milliseconds > 0: self._ping_job = PeriodicCallback(self.keep_alive, keep_alive_milliseconds, io_loop=self._loop) else: self._ping_job = None @property def app_paths(self): return set(self._applications) @property def io_loop(self): return self._loop @property def websocket_origins(self): return self._websocket_origins @property def secret_key(self): return self._secret_key @property def sign_sessions(self): return self._sign_sessions @property def generate_session_ids(self): return self._generate_session_ids def resources(self, absolute_url=None): if absolute_url: return Resources(mode="server", root_url=absolute_url + self._prefix, path_versioner=StaticHandler.append_version) return Resources(mode="server", root_url=self._prefix, path_versioner=StaticHandler.append_version) def start(self): ''' Start the Bokeh Server application. ''' self._stats_job.start() self._cleanup_job.start() if self._ping_job is not None: self._ping_job.start() for context in self._applications.values(): context.run_load_hook() def stop(self, wait=True): ''' Stop the Bokeh Server application. Args: wait (boolean): whether to wait for orderly cleanup (default: True) Returns: None ''' # TODO we should probably close all connections and shut # down all sessions here for context in self._applications.values(): context.run_unload_hook() self._stats_job.stop() self._cleanup_job.stop() if self._ping_job is not None: self._ping_job.stop() self._executor.shutdown(wait=wait) self._clients.clear() @property def executor(self): return self._executor def new_connection(self, protocol, socket, application_context, session): connection = ServerConnection(protocol, socket, application_context, session) self._clients.add(connection) return connection def client_lost(self, connection): self._clients.discard(connection) connection.detach_session() def get_session(self, app_path, session_id): if app_path not in self._applications: raise ValueError("Application %s does not exist on this server" % app_path) return self._applications[app_path].get_session(session_id) def get_sessions(self, app_path): if app_path not in self._applications: raise ValueError("Application %s does not exist on this server" % app_path) return list(self._applications[app_path].sessions) @gen.coroutine def cleanup_sessions(self): for app in self._applications.values(): yield app.cleanup_sessions(self._unused_session_linger_milliseconds) raise gen.Return(None) def log_stats(self): if log.getEffectiveLevel() > logging.DEBUG: # avoid the work below if we aren't going to log anything return log.debug("[pid %d] %d clients connected", os.getpid(), len(self._clients)) for app_path, app in self._applications.items(): sessions = list(app.sessions) unused_count = 0 for s in sessions: if s.connection_count == 0: unused_count += 1 log.debug("[pid %d] %s has %d sessions with %d unused", os.getpid(), app_path, len(sessions), unused_count) def keep_alive(self): for c in self._clients: c.send_ping() @gen.coroutine def run_in_background(self, _func, *args, **kwargs): """ Run a synchronous function in the background without disrupting the main thread. Useful for long-running jobs. """ res = yield self._executor.submit(_func, *args, **kwargs) raise gen.Return(res)
class DataRouter(object): DEFAULT_PROJECT_NAME = "default" def __init__(self, config, component_builder): self._training_processes = config['max_training_processes'] if config['max_training_processes'] > 0 else 1 self.config = config self.responses = self._create_query_logger(config) self.model_dir = config['path'] self.token = config['token'] self.emulator = self._create_emulator() self.component_builder = component_builder if component_builder else ComponentBuilder(use_cache=True) self.project_store = self._create_project_store() self.pool = ProcessPool(self._training_processes) def __del__(self): """Terminates workers pool processes""" self.pool.shutdown() def _create_query_logger(self, config): """Creates a logger that will persist incoming queries and their results.""" response_log_dir = config['response_log'] # Ensures different log files for different processes in multi worker mode if response_log_dir: # We need to generate a unique file name, even in multiprocess environments timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') log_file_name = "rasa_nlu_log-{}-{}.log".format(timestamp, os.getpid()) response_logfile = os.path.join(response_log_dir, log_file_name) # Instantiate a standard python logger, which we are going to use to log requests utils.create_dir_for_file(response_logfile) query_logger = Logger(observer=jsonFileLogObserver(io.open(response_logfile, 'a', encoding='utf8')), namespace='query-logger') # Prevents queries getting logged with parent logger --> might log them to stdout logger.info("Logging requests to '{}'.".format(response_logfile)) return query_logger else: # If the user didn't provide a logging directory, we wont log! logger.info("Logging of requests is disabled. (No 'request_log' directory configured)") return None def _create_project_store(self): projects = [] if os.path.isdir(self.config['path']): projects = os.listdir(self.config['path']) project_store = {} for project in projects: project_store[project] = Project(self.config, self.component_builder, project) if not project_store: project_store[self.DEFAULT_PROJECT_NAME] = Project() return project_store def _create_emulator(self): """Sets which NLU webservice to emulate among those supported by Rasa""" mode = self.config['emulate'] if mode is None: from rasa_nlu.emulators import NoEmulator return NoEmulator() elif mode.lower() == 'wit': from rasa_nlu.emulators.wit import WitEmulator return WitEmulator() elif mode.lower() == 'luis': from rasa_nlu.emulators.luis import LUISEmulator return LUISEmulator() elif mode.lower() == 'api': from rasa_nlu.emulators.api import ApiEmulator return ApiEmulator() else: raise ValueError("unknown mode : {0}".format(mode)) def extract(self, data): return self.emulator.normalise_request_json(data) def parse(self, data): project = data.get("project") or self.DEFAULT_PROJECT_NAME model = data.get("model") if project not in self.project_store: projects = os.listdir(self.config['path']) if project not in projects: raise InvalidProjectError("No project found with name '{}'.".format(project)) else: try: self.project_store[project] = Project(self.config, self.component_builder, project) except Exception as e: raise InvalidProjectError("Unable to load project '{}'. Error: {}".format(project, e)) response, used_model = self.project_store[project].parse(data['text'], data.get('time', None), model) if self.responses: self.responses.info(user_input=response, project=project, model=used_model) return self.format_response(response) def format_response(self, data): return self.emulator.normalise_response_json(data) def get_status(self): # This will only count the trainings started from this process, if run in multi worker mode, there might # be other trainings run in different processes we don't know about. return { "available_projects": {name: project.as_dict() for name, project in self.project_store.items()} } def start_train_process(self, data, config_values): # type: (Text, Dict[Text, Any]) -> Deferred """Start a model training.""" if PY3: f = tempfile.NamedTemporaryFile("w+", suffix="_training_data", delete=False, encoding="utf-8") f.write(data) else: f = tempfile.NamedTemporaryFile("w+", suffix="_training_data", delete=False) f.write(data.encode("utf-8")) f.close() # TODO: fix config handling _config = self.config.as_dict() for key, val in config_values.items(): _config[key] = val _config["data"] = f.name train_config = RasaNLUConfig(cmdline_args=_config) project = _config.get("project") if not project: raise InvalidProjectError("Missing project name to train") elif project in self.project_store: if self.project_store[project].status == 1: raise AlreadyTrainingError else: self.project_store[project].status = 1 elif project not in self.project_store: self.project_store[project] = Project(self.config, self.component_builder, project) self.project_store[project].status = 1 def training_callback(model_path): model_dir = os.path.basename(os.path.normpath(model_path)) self.project_store[project].update(model_dir) return model_dir def training_errback(failure): target_project = self.project_store.get(failure.value.failed_target_project) if target_project: target_project.status = 0 return failure logger.debug("New training queued") result = self.pool.submit(do_train_in_worker, train_config) result = deferred_from_future(result) result.addCallback(training_callback) result.addErrback(training_errback) return result
def main(vcf, covariates, formula, min_qual, min_genotype_qual, min_samples, weighted=False, as_vcf=False, exclude_nan=False, groups=None): #if weighted == "FALSE": weighted = False #else: # weight_fn = {'log10': np.log10, 'log': np.log, 'GQ': np.array}[weighted] if covariates.endswith('.csv'): covariate_df = pd.read_csv(covariates, index_col=0) else: covariate_df = pd.read_table(covariates, index_col=0) covariate_df.index = [str(x) for x in covariate_df.index] gmatrix = {} if groups == 'covariance': assert op.isfile(vcf), ('need to iterate over vcf 2x') cov = get_covariance(_get_genotypes(vcf, min_qual, min_genotype_qual, min_samples, as_vcf)) groups = pd.DataFrame(cov, index=covariate_df.index, columns=covariate_df.index) print(groups) # NOTE: currently using GLS and a covariance matrix but we assume # a binary dependent variable so estimates are off. po = ProcessPoolExecutor(1) for i, (samples, genos, quals, variant) in enumerate( _get_genotypes(vcf, min_qual, min_genotype_qual, min_samples, as_vcf)): if i == 0 and not samples is None: # make sure we have covariates for all samples in the vcf assert not set(samples).difference(covariate_df.index),\ set(samples).difference(covariate_df.index) covariate_df = covariate_df.ix[samples,:] covariate_df['genotype'] = genos if samples is None: if exclude_nan: continue res = {'OR': np.nan, 'pvalue': np.nan, 'z': np.nan, 'OR_CI': (np.nan, np.nan), 'xtab': 'NA'} else: xtab_future = po.submit(xtab, formula, covariate_df) try: res = vcfassoc(formula, covariate_df, groups) gmatrix['{CHROM}:{POS}'.format(**variant)] = genos except np.linalg.linalg.LinAlgError: res = {'OR': np.nan, 'pvalue': np.nan, 'z': np.nan, 'OR_CI': (np.nan, np.nan)} except statsmodels.tools.sm_exceptions.PerfectSeparationError: print("WARNING: perfect separation, too few samples(?)", ": setting to -9: {CHROM}:{POS}".format(**variant), file=sys.stderr) res = {} res['z'] = res['OR'] = np.nan res['pvalue'] = -9.0 # blech. res['OR_CI'] = np.nan, np.nan gmatrix['{CHROM}:{POS}'.format(**variant)] = genos except IndexError: continue res['xtab'] = xtab_future.result() #res['xtab'] = xtab(formula, covariate_df) print_result(res, variant, as_vcf, i) l1_regr(pd.DataFrame(gmatrix), covariate_df, formula) po.shutdown()
class BokehTornado(TornadoApplication): ''' A Tornado Application used to implement the Bokeh Server. The Server class is the main public interface, this class has Tornado implementation details. Args: applications (dict of str : bokeh.application.Application) : map from paths to Application instances The application is used to create documents for each session. extra_patterns (seq[tuple]) : tuples of (str, http or websocket handler) Use this argument to add additional endpoints to custom deployments of the Bokeh Server. prefix (str) : a URL prefix to use for all Bokeh server paths hosts (list) : hosts that are valid values for the Host header secret_key (str) : secret key for signing session IDs sign_sessions (boolean) : whether to sign session IDs generate_session_ids (boolean) : whether to generate a session ID when none is provided extra_websocket_origins (list) : hosts that can connect to the websocket These are in addition to ``hosts``. keep_alive_milliseconds (int) : number of milliseconds between keep-alive pings Set to 0 to disable pings. Pings keep the websocket open. develop (boolean) : True for develop mode ''' def __init__(self, applications, prefix, hosts, extra_websocket_origins, io_loop=None, extra_patterns=None, secret_key=settings.secret_key_bytes(), sign_sessions=settings.sign_sessions(), generate_session_ids=True, # heroku, nginx default to 60s timeout, so well less than that keep_alive_milliseconds=37000, # how often to check for unused sessions check_unused_sessions_milliseconds=17000, # how long unused sessions last unused_session_lifetime_milliseconds=60*30*1000, # how often to log stats stats_log_frequency_milliseconds=15000, develop=False): self._prefix = prefix if io_loop is None: io_loop = IOLoop.current() self._loop = io_loop if keep_alive_milliseconds < 0: # 0 means "disable" raise ValueError("keep_alive_milliseconds must be >= 0") self._hosts = set(hosts) self._websocket_origins = self._hosts | set(extra_websocket_origins) self._resources = {} self._develop = develop self._secret_key = secret_key self._sign_sessions = sign_sessions self._generate_session_ids = generate_session_ids log.debug("Allowed Host headers: %r", list(self._hosts)) log.debug("These host origins can connect to the websocket: %r", list(self._websocket_origins)) # Wrap applications in ApplicationContext self._applications = dict() for k,v in applications.items(): self._applications[k] = ApplicationContext(v, self._develop, self._loop) extra_patterns = extra_patterns or [] all_patterns = [] for key in applications: app_patterns = [] for p in per_app_patterns: if key == "/": route = p[0] else: route = key + p[0] route = self._prefix + route app_patterns.append((route, p[1], { "application_context" : self._applications[key] })) websocket_path = None for r in app_patterns: if r[0].endswith("/ws"): websocket_path = r[0] if not websocket_path: raise RuntimeError("Couldn't find websocket path") for r in app_patterns: r[2]["bokeh_websocket_path"] = websocket_path all_patterns.extend(app_patterns) for p in extra_patterns + toplevel_patterns: prefixed_pat = (self._prefix+p[0],) + p[1:] all_patterns.append(prefixed_pat) for pat in all_patterns: _whitelist(pat[1]) log.debug("Patterns are: %r", all_patterns) super(BokehTornado, self).__init__(all_patterns) self._clients = set() self._executor = ProcessPoolExecutor(max_workers=4) self._loop.add_callback(self._start_async) self._stats_job = PeriodicCallback(self.log_stats, stats_log_frequency_milliseconds, io_loop=self._loop) self._unused_session_linger_seconds = unused_session_lifetime_milliseconds self._cleanup_job = PeriodicCallback(self.cleanup_sessions, check_unused_sessions_milliseconds, io_loop=self._loop) if keep_alive_milliseconds > 0: self._ping_job = PeriodicCallback(self.keep_alive, keep_alive_milliseconds, io_loop=self._loop) else: self._ping_job = None @property def io_loop(self): return self._loop @property def websocket_origins(self): return self._websocket_origins @property def secret_key(self): return self._secret_key @property def sign_sessions(self): return self._sign_sessions @property def generate_session_ids(self): return self._generate_session_ids def root_url_for_request(self, request): return request.protocol + "://" + request.host + self._prefix + "/" def websocket_url_for_request(self, request, websocket_path): # websocket_path comes from the handler, and already has any # prefix included, no need to add here protocol = "ws" if request.protocol == "https": protocol = "wss" return protocol + "://" + request.host + websocket_path def resources(self, request): root_url = self.root_url_for_request(request) if root_url not in self._resources: self._resources[root_url] = Resources(mode="server", root_url=root_url, path_versioner=StaticHandler.append_version) return self._resources[root_url] def start(self, start_loop=True): ''' Start the Bokeh Server application main loop. Args: start_loop (boolean): False to not actually start event loop, used in tests Returns: None Notes: Keyboard interrupts or sigterm will cause the server to shut down. ''' self._stats_job.start() self._cleanup_job.start() if self._ping_job is not None: self._ping_job.start() for context in self._applications.values(): context.run_load_hook() if start_loop: try: self._loop.start() except KeyboardInterrupt: print("\nInterrupted, shutting down") def stop(self): ''' Stop the Bokeh Server application. Returns: None ''' # TODO we should probably close all connections and shut # down all sessions either here or in unlisten() ... but # it isn't that important since in real life it's rare to # do a clean shutdown (vs. a kill-by-signal) anyhow. for context in self._applications.values(): context.run_unload_hook() self._stats_job.stop() self._cleanup_job.stop() if self._ping_job is not None: self._ping_job.stop() self._loop.stop() @property def executor(self): return self._executor def new_connection(self, protocol, socket, application_context, session): connection = ServerConnection(protocol, socket, application_context, session) self._clients.add(connection) return connection def client_lost(self, connection): self._clients.discard(connection) connection.detach_session() def get_session(self, app_path, session_id): if app_path not in self._applications: raise ValueError("Application %s does not exist on this server" % app_path) return self._applications[app_path].get_session(session_id) def get_sessions(self, app_path): if app_path not in self._applications: raise ValueError("Application %s does not exist on this server" % app_path) return list(self._applications[app_path].sessions) @gen.coroutine def cleanup_sessions(self): for app in self._applications.values(): yield app.cleanup_sessions(self._unused_session_linger_seconds) raise gen.Return(None) def log_stats(self): if log.getEffectiveLevel() > logging.DEBUG: # avoid the work below if we aren't going to log anything return log.debug("[pid %d] %d clients connected", os.getpid(), len(self._clients)) for app_path, app in self._applications.items(): sessions = list(app.sessions) unused_count = 0 for s in sessions: if s.connection_count == 0: unused_count += 1 log.debug("[pid %d] %s has %d sessions with %d unused", os.getpid(), app_path, len(sessions), unused_count) def keep_alive(self): for c in self._clients: c.send_ping() @gen.coroutine def run_in_background(self, _func, *args, **kwargs): """ Run a synchronous function in the background without disrupting the main thread. Useful for long-running jobs. """ res = yield self._executor.submit(_func, *args, **kwargs) raise gen.Return(res) @gen.coroutine def _start_async(self): try: atexit.register(self._atexit) signal.signal(signal.SIGTERM, self._sigterm) except Exception: self.exit(1) _atexit_ran = False def _atexit(self): if self._atexit_ran: return self._atexit_ran = True self._stats_job.stop() IOLoop.clear_current() loop = IOLoop() loop.make_current() loop.run_sync(self._cleanup) def _sigterm(self, signum, frame): print("Received SIGTERM, shutting down") self.stop() self._atexit() @gen.coroutine def _cleanup(self): log.debug("Shutdown: cleaning up") self._executor.shutdown(wait=False) self._clients.clear()
class RandomOptimizer(paths.PathOptimizer): """Base class for running any random path finder that benefits from repeated calling, possibly in a parallel fashion. Custom random optimizers should subclass this, and the ``setup`` method should be implemented with the following signature:: def setup(self, inputs, output, size_dict): # custom preparation here ... return trial_fn, trial_args Where ``trial_fn`` itself should have the signature:: def trial_fn(r, *trial_args): # custom computation of path here return ssa_path, cost, size Where ``r`` is the run number and could for example be used to seed a random number generator. See ``RandomGreedy`` for an example. Parameters ---------- max_repeats : int, optional The maximum number of repeat trials to have. max_time : float, optional The maximum amount of time to run the algorithm for. minimize : {'flops', 'size'}, optional Whether to favour paths that minimize the total estimated flop-count or the size of the largest intermediate created. parallel : {bool, int, or executor-pool like}, optional Whether to parallelize the random trials, by default ``False``. If ``True``, use a ``concurrent.futures.ProcessPoolExecutor`` with the same number of processes as cores. If an integer is specified, use that many processes instead. Finally, you can supply a custom executor-pool which should have an API matching that of the python 3 standard library module ``concurrent.futures``. Namely, a ``submit`` method that returns ``Future`` objects, themselves with ``result`` and ``cancel`` methods. pre_dispatch : int, optional If running in parallel, how many jobs to pre-dispatch so as to avoid submitting all jobs at once. Should also be more than twice the number of workers to avoid under-subscription. Default: 128. Attributes ---------- path : list[tuple[int]] The best path found so far. costs : list[int] The list of each trial's costs found so far. sizes : list[int] The list of each trial's largest intermediate size so far. See Also -------- RandomGreedy """ def __init__(self, max_repeats=32, max_time=None, minimize='flops', parallel=False, pre_dispatch=128): if minimize not in ('flops', 'size'): raise ValueError("`minimize` should be one of {'flops', 'size'}.") self.max_repeats = max_repeats self.max_time = max_time self.minimize = minimize self.better = paths.get_better_fn(minimize) self.parallel = parallel self.pre_dispatch = pre_dispatch self.costs = [] self.sizes = [] self.best = {'flops': float('inf'), 'size': float('inf')} self._repeats_start = 0 @property def path(self): """The best path found so far. """ return paths.ssa_to_linear(self.best['ssa_path']) @property def parallel(self): return self._parallel @parallel.setter def parallel(self, parallel): # shutdown any previous executor if we are managing it if getattr(self, '_managing_executor', False): self._executor.shutdown() self._parallel = parallel self._managing_executor = False if parallel is False: self._executor = None return if parallel is True: from concurrent.futures import ProcessPoolExecutor self._executor = ProcessPoolExecutor() self._managing_executor = True return if isinstance(parallel, numbers.Number): from concurrent.futures import ProcessPoolExecutor self._executor = ProcessPoolExecutor(parallel) self._managing_executor = True return # assume a pool-executor has been supplied self._executor = parallel def _gen_results_parallel(self, repeats, trial_fn, args): """Lazily generate results from an executor without submitting all jobs at once. """ self._futures = deque() # the idea here is to submit at least ``pre_dispatch`` jobs *before* we # yield any results, then do both in tandem, before draining the queue for r in repeats: if len(self._futures) < self.pre_dispatch: self._futures.append(self._executor.submit(trial_fn, r, *args)) continue yield self._futures.popleft().result() while self._futures: yield self._futures.popleft().result() def _cancel_futures(self): if self._executor is not None: for f in self._futures: f.cancel() def setup(self, inputs, output, size_dict): raise NotImplementedError def __call__(self, inputs, output, size_dict, memory_limit): # start a timer? if self.max_time is not None: t0 = time.time() trial_fn, trial_args = self.setup(inputs, output, size_dict) r_start = self._repeats_start + len(self.costs) r_stop = r_start + self.max_repeats repeats = range(r_start, r_stop) # create the trials lazily if self._executor is not None: trials = self._gen_results_parallel(repeats, trial_fn, trial_args) else: trials = (trial_fn(r, *trial_args) for r in repeats) # assess the trials for ssa_path, cost, size in trials: # keep track of all costs and sizes self.costs.append(cost) self.sizes.append(size) # check if we have found a new best found_new_best = self.better(cost, size, self.best['flops'], self.best['size']) if found_new_best: self.best['flops'] = cost self.best['size'] = size self.best['ssa_path'] = ssa_path # check if we have run out of time if (self.max_time is not None) and (time.time() > t0 + self.max_time): break self._cancel_futures() return self.path def __del__(self): # if we created the parallel pool-executor, shut it down if getattr(self, '_managing_executor', False): self._executor.shutdown()
class MultiProcPlugin(DistributedPluginBase): """ Execute workflow with multiprocessing, not sending more jobs at once than the system can support. The plugin_args input to run can be used to control the multiprocessing execution and defining the maximum amount of memory and threads that should be used. When those parameters are not specified, the number of threads and memory of the system is used. System consuming nodes should be tagged:: memory_consuming_node.mem_gb = 8 thread_consuming_node.n_procs = 16 The default number of threads and memory are set at node creation, and are 1 and 0.25GB respectively. Currently supported options are: - non_daemon: boolean flag to execute as non-daemon processes - n_procs: maximum number of threads to be executed in parallel - memory_gb: maximum memory (in GB) that can be used at once. - raise_insufficient: raise error if the requested resources for a node over the maximum `n_procs` and/or `memory_gb` (default is ``True``). - scheduler: sort jobs topologically (``'tsort'``, default value) or prioritize jobs by, first, memory consumption and, second, number of threads (``'mem_thread'`` option). - mp_context: name of multiprocessing context to use """ def __init__(self, plugin_args=None): # Init variables and instance attributes super(MultiProcPlugin, self).__init__(plugin_args=plugin_args) self._taskresult = {} self._task_obj = {} self._taskid = 0 # Cache current working directory and make sure we # change to it when workers are set up self._cwd = os.getcwd() # Read in options or set defaults. self.processors = self.plugin_args.get('n_procs', mp.cpu_count()) self.memory_gb = self.plugin_args.get( 'memory_gb', # Allocate 90% of system memory get_system_total_memory_gb() * 0.9) self.raise_insufficient = self.plugin_args.get('raise_insufficient', True) # Instantiate different thread pools for non-daemon processes logger.debug('[MultiProc] Starting (n_procs=%d, ' 'mem_gb=%0.2f, cwd=%s)', self.processors, self.memory_gb, self._cwd) try: mp_context = mp.context.get_context( self.plugin_args.get('mp_context')) self.pool = ProcessPoolExecutor(max_workers=self.processors, initializer=os.chdir, initargs=(self._cwd,), mp_context=mp_context) except (AttributeError, TypeError): # Python < 3.7 does not support initialization or contexts self.pool = ProcessPoolExecutor(max_workers=self.processors) self._stats = None def _async_callback(self, args): # Make sure runtime is not left at a dubious working directory os.chdir(self._cwd) result = args.result() self._taskresult[result['taskid']] = result def _get_result(self, taskid): return self._taskresult.get(taskid) def _clear_task(self, taskid): del self._task_obj[taskid] def _submit_job(self, node, updatehash=False): self._taskid += 1 # Don't allow streaming outputs if getattr(node.interface, 'terminal_output', '') == 'stream': node.interface.terminal_output = 'allatonce' result_future = self.pool.submit(run_node, node, updatehash, self._taskid) result_future.add_done_callback(self._async_callback) self._task_obj[self._taskid] = result_future logger.debug('[MultiProc] Submitted task %s (taskid=%d).', node.fullname, self._taskid) return self._taskid def _prerun_check(self, graph): """Check if any node exeeds the available resources""" tasks_mem_gb = [] tasks_num_th = [] for node in graph.nodes(): tasks_mem_gb.append(node.mem_gb) tasks_num_th.append(node.n_procs) if np.any(np.array(tasks_mem_gb) > self.memory_gb): logger.warning( 'Some nodes exceed the total amount of memory available ' '(%0.2fGB).', self.memory_gb) if self.raise_insufficient: raise RuntimeError('Insufficient resources available for job') if np.any(np.array(tasks_num_th) > self.processors): logger.warning( 'Some nodes demand for more threads than available (%d).', self.processors) if self.raise_insufficient: raise RuntimeError('Insufficient resources available for job') def _postrun_check(self): self.pool.shutdown() def _check_resources(self, running_tasks): """ Make sure there are resources available """ free_memory_gb = self.memory_gb free_processors = self.processors for _, jobid in running_tasks: free_memory_gb -= min(self.procs[jobid].mem_gb, free_memory_gb) free_processors -= min(self.procs[jobid].n_procs, free_processors) return free_memory_gb, free_processors def _send_procs_to_workers(self, updatehash=False, graph=None): """ Sends jobs to workers when system resources are available. """ # Check to see if a job is available (jobs with all dependencies run) # See https://github.com/nipy/nipype/pull/2200#discussion_r141605722 # See also https://github.com/nipy/nipype/issues/2372 jobids = np.flatnonzero(~self.proc_done & (self.depidx.sum(axis=0) == 0).__array__()) # Check available resources by summing all threads and memory used free_memory_gb, free_processors = self._check_resources( self.pending_tasks) stats = (len(self.pending_tasks), len(jobids), free_memory_gb, self.memory_gb, free_processors, self.processors) if self._stats != stats: tasks_list_msg = '' if logger.level <= INFO: running_tasks = [ ' * %s' % self.procs[jobid].fullname for _, jobid in self.pending_tasks ] if running_tasks: tasks_list_msg = '\nCurrently running:\n' tasks_list_msg += '\n'.join(running_tasks) tasks_list_msg = indent(tasks_list_msg, ' ' * 21) logger.info( '[MultiProc] Running %d tasks, and %d jobs ready. Free ' 'memory (GB): %0.2f/%0.2f, Free processors: %d/%d.%s', len(self.pending_tasks), len(jobids), free_memory_gb, self.memory_gb, free_processors, self.processors, tasks_list_msg) self._stats = stats if free_memory_gb < 0.01 or free_processors == 0: logger.debug('No resources available') return if len(jobids) + len(self.pending_tasks) == 0: logger.debug('No tasks are being run, and no jobs can ' 'be submitted to the queue. Potential deadlock') return jobids = self._sort_jobs( jobids, scheduler=self.plugin_args.get('scheduler')) # Run garbage collector before potentially submitting jobs gc.collect() # Submit jobs for jobid in jobids: # First expand mapnodes if isinstance(self.procs[jobid], MapNode): try: num_subnodes = self.procs[jobid].num_subnodes() except Exception: traceback = format_exception(*sys.exc_info()) self._clean_queue( jobid, graph, result={ 'result': None, 'traceback': traceback }) self.proc_pending[jobid] = False continue if num_subnodes > 1: submit = self._submit_mapnode(jobid) if not submit: continue # Check requirements of this job next_job_gb = min(self.procs[jobid].mem_gb, self.memory_gb) next_job_th = min(self.procs[jobid].n_procs, self.processors) # If node does not fit, skip at this moment if next_job_th > free_processors or next_job_gb > free_memory_gb: logger.debug('Cannot allocate job %d (%0.2fGB, %d threads).', jobid, next_job_gb, next_job_th) continue free_memory_gb -= next_job_gb free_processors -= next_job_th logger.debug('Allocating %s ID=%d (%0.2fGB, %d threads). Free: ' '%0.2fGB, %d threads.', self.procs[jobid].fullname, jobid, next_job_gb, next_job_th, free_memory_gb, free_processors) # change job status in appropriate queues self.proc_done[jobid] = True self.proc_pending[jobid] = True # If cached and up-to-date just retrieve it, don't run if self._local_hash_check(jobid, graph): continue # updatehash and run_without_submitting are also run locally if updatehash or self.procs[jobid].run_without_submitting: logger.debug('Running node %s on master thread', self.procs[jobid]) try: self.procs[jobid].run(updatehash=updatehash) except Exception: traceback = format_exception(*sys.exc_info()) self._clean_queue( jobid, graph, result={ 'result': None, 'traceback': traceback }) # Release resources self._task_finished_cb(jobid) self._remove_node_dirs() free_memory_gb += next_job_gb free_processors += next_job_th # Display stats next loop self._stats = None # Clean up any debris from running node in main process gc.collect() continue # Task should be submitted to workers # Send job to task manager and add to pending tasks if self._status_callback: self._status_callback(self.procs[jobid], 'start') tid = self._submit_job( deepcopy(self.procs[jobid]), updatehash=updatehash) if tid is None: self.proc_done[jobid] = False self.proc_pending[jobid] = False else: self.pending_tasks.insert(0, (tid, jobid)) # Display stats next loop self._stats = None def _sort_jobs(self, jobids, scheduler='tsort'): if scheduler == 'mem_thread': return sorted( jobids, key=lambda item: (self.procs[item].mem_gb, self.procs[item].n_procs) ) return jobids
def _parallel_execute(model_params): nb_cell = model_params['nb_cell'] nb_time_step = model_params['nb_time_step'] progress_desc = model_params['progress_desc'] node_hierarchy = model_params['node_hierarchy'] li_cell_up = model_params['li_cell_up'] pool = ProcessPoolExecutor(max_workers=model_params['nworkers']) with tqdm(total=nb_cell, ascii=True, desc=progress_desc, unit=' cell') as pbar: ## Loop on cell hierarchy for lvl in range(len(node_hierarchy.keys())): futures = [] for cell in node_hierarchy[lvl]: if cell == model_params['cell_external_flow']: external_flow_flag = True else: external_flow_flag = False if len(li_cell_up[cell]) > 0: soil_upstream_inflow = \ model_params['dset_Q_down'][1:, li_cell_up[cell]] channel_upstream_inflow = \ model_params['dset_Qc_out'][1:, li_cell_up[cell]] else: soil_upstream_inflow = [np.array([]) for i in range(nb_time_step)] channel_upstream_inflow = [np.array([]) for i in range(nb_time_step)] ts_params = { 'cell': cell, 'nb_time_step': model_params['nb_time_step'], 'Vs_t0': model_params['Vs_t0'][cell], 'Vo_t0': model_params['Vo_t0'][cell], 'Vc_t0': model_params['Vc_t0'][cell], 'psi_b': model_params['psi_b'][cell], 'lamda': model_params['lamda'][cell], 'external_flow_flag': external_flow_flag, 'rainfall_forcing': model_params['rainfall_forcing'][:, cell], 'ETr_forcing': model_params['ETr_forcing'][:, cell], 'ET0_forcing': model_params['ET0_forcing'][:, cell], 'soil_upstream_inflow': soil_upstream_inflow, 'channel_upstream_inflow': channel_upstream_inflow, 'eff_theta': model_params['eff_theta'][cell], 'X': model_params['X'], 'W': model_params['W'][cell], 'Dt': model_params['Dt'], 'Xc': model_params['Xc'][cell], 'Kc': model_params['Kc'][cell], 'Ks': model_params['Ks'][cell], 'b_s': model_params['b_s'][cell], 'b_o': model_params['b_o'][cell], 'b_c': model_params['b_c'][cell], 'alpha_s': model_params['alpha_s'], 'alpha_o': model_params['alpha_o'], 'alpha_c': model_params['alpha_c'], 'solve_s': model_params['solve_s'], 'solve_o': model_params['solve_o'], 'solve_c': model_params['solve_c'], 'Vsm': model_params['Vsm'][cell], 'channel_flag': model_params['channel_flag'][cell], 'external_flow_records': model_params['external_flow_records'] } f = pool.submit(_solve_cell_timeseries, ts_params) f.add_done_callback(functools.partial(_cell_clean_up, cell, pbar, model_params)) futures.append(f) wait(futures) pool.shutdown()
class DataRouter(object): def __init__(self, project_dir=None, max_training_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None, model_server=None, wait_time_between_pulls=None): self._training_processes = max(max_training_processes, 1) self._current_training_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_store = self._create_project_store(project_dir) # tensorflow sessions are not fork-safe, # and training processes have to be spawned instead of forked. See # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment # -258934405 multiprocessing.set_start_method('spawn', force=True) self.pool = ProcessPool(self._training_processes) def __del__(self): """Terminates workers pool processes""" self.pool.shutdown() @staticmethod def _create_query_logger(response_log): """Create a logger that will persist incoming query results.""" # Ensures different log files for different # processes in multi worker mode if response_log: # We need to generate a unique file name, # even in multiprocess environments timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') log_file_name = "rasa_nlu_log-{}-{}.log".format(timestamp, os.getpid()) response_logfile = os.path.join(response_log, log_file_name) # Instantiate a standard python logger, # which we are going to use to log requests utils.create_dir_for_file(response_logfile) out_file = io.open(response_logfile, 'a', encoding='utf8') # noinspection PyTypeChecker query_logger = Logger( observer=jsonFileLogObserver(out_file, recordSeparator=''), namespace='query-logger') # Prevents queries getting logged with parent logger # --> might log them to stdout logger.info("Logging requests to '{}'.".format(response_logfile)) return query_logger else: # If the user didn't provide a logging directory, we wont log! logger.info("Logging of requests is disabled. " "(No 'request_log' directory configured)") return None def _collect_projects(self, project_dir: Text) -> List[Text]: if project_dir and os.path.isdir(project_dir): projects = os.listdir(project_dir) else: projects = [] projects.extend(self._list_projects_in_cloud()) return projects def _create_project_store(self, project_dir: Text) -> Dict[Text, Any]: default_project = RasaNLUModelConfig.DEFAULT_PROJECT_NAME projects = self._collect_projects(project_dir) project_store = {} if self.model_server is not None: project_store[default_project] = load_from_server( self.component_builder, default_project, self.project_dir, self.remote_storage, self.model_server, self.wait_time_between_pulls ) else: for project in projects: project_store[project] = Project(self.component_builder, project, self.project_dir, self.remote_storage) if not project_store: project_store[default_project] = Project( project=default_project, project_dir=self.project_dir, remote_storage=self.remote_storage ) return project_store def _pre_load(self, projects: List[Text]) -> None: logger.debug("loading %s", projects) for project in self.project_store: if project in projects: self.project_store[project].load_model() def _list_projects_in_cloud(self) -> List[Text]: # noinspection PyBroadException try: from rasa_nlu.persistor import get_persistor p = get_persistor(self.remote_storage) if p is not None: return p.list_projects() else: return [] except Exception: logger.exception("Failed to list projects. Make sure you have " "correctly configured your cloud storage " "settings.") return [] @staticmethod def _create_emulator(mode: Optional[Text]) -> NoEmulator: """Create emulator for specified mode. If no emulator is specified, we will use the Rasa NLU format.""" if mode is None: return NoEmulator() elif mode.lower() == 'wit': from rasa_nlu.emulators.wit import WitEmulator return WitEmulator() elif mode.lower() == 'luis': from rasa_nlu.emulators.luis import LUISEmulator return LUISEmulator() elif mode.lower() == 'dialogflow': from rasa_nlu.emulators.dialogflow import DialogflowEmulator return DialogflowEmulator() else: raise ValueError("unknown mode : {0}".format(mode)) @staticmethod def _tf_in_pipeline(model_config: RasaNLUModelConfig) -> bool: from rasa_nlu.classifiers.embedding_intent_classifier import \ EmbeddingIntentClassifier return any(EmbeddingIntentClassifier.name in c.values() for c in model_config.pipeline) def extract(self, data: Dict[Text, Any]) -> Dict[Text, Any]: return self.emulator.normalise_request_json(data) def parse(self, data: Dict[Text, Any]) -> Dict[Text, Any]: project = data.get("project", RasaNLUModelConfig.DEFAULT_PROJECT_NAME) model = data.get("model") if project not in self.project_store: projects = self._list_projects(self.project_dir) cloud_provided_projects = self._list_projects_in_cloud() projects.extend(cloud_provided_projects) if project not in projects: raise InvalidProjectError( "No project found with name '{}'.".format(project)) else: try: self.project_store[project] = Project( self.component_builder, project, self.project_dir, self.remote_storage) except Exception as e: raise InvalidProjectError( "Unable to load project '{}'. " "Error: {}".format(project, e)) time = data.get('time') response = self.project_store[project].parse(data['text'], time, model) if self.responses: self.responses.info('', user_input=response, project=project, model=response.get('model')) return self.format_response(response) @staticmethod def _list_projects(path: Text) -> List[Text]: """List the projects in the path, ignoring hidden directories.""" return [os.path.basename(fn) for fn in utils.list_subdirectories(path)] def format_response(self, data: Dict[Text, Any]) -> Dict[Text, Any]: return self.emulator.normalise_response_json(data) def get_status(self) -> Dict[Text, Any]: # This will only count the trainings started from this # process, if run in multi worker mode, there might # be other trainings run in different processes we don't know about. return { "max_training_processes": self._training_processes, "current_training_processes": self._current_training_processes, "available_projects": { name: project.as_dict() for name, project in self.project_store.items() } } def start_train_process(self, data_file: Text, project: Text, train_config: RasaNLUModelConfig, model_name: Optional[Text] = None ) -> Deferred: """Start a model training.""" if not project: raise InvalidProjectError("Missing project name to train") if self._training_processes <= self._current_training_processes: raise MaxTrainingError if project in self.project_store: self.project_store[project].status = STATUS_TRAINING elif project not in self.project_store: self.project_store[project] = Project( self.component_builder, project, self.project_dir, self.remote_storage) self.project_store[project].status = STATUS_TRAINING def training_callback(model_path): model_dir = os.path.basename(os.path.normpath(model_path)) self.project_store[project].update(model_dir) self._current_training_processes -= 1 self.project_store[project].current_training_processes -= 1 if (self.project_store[project].status == STATUS_TRAINING and self.project_store[project].current_training_processes == 0): self.project_store[project].status = STATUS_READY return model_path def training_errback(failure): logger.warning(failure) self._current_training_processes -= 1 self.project_store[project].current_training_processes -= 1 self.project_store[project].status = STATUS_FAILED self.project_store[project].error_message = str(failure) return failure logger.debug("New training queued") self._current_training_processes += 1 self.project_store[project].current_training_processes += 1 result = self.pool.submit(do_train_in_worker, train_config, data_file, path=self.project_dir, project=project, fixed_model_name=model_name, storage=self.remote_storage) result = deferred_from_future(result) result.addCallback(training_callback) result.addErrback(training_errback) return result # noinspection PyProtectedMember def evaluate(self, data: Text, project: Optional[Text] = None, model: Optional[Text] = None) -> Dict[Text, Any]: """Perform a model evaluation.""" project = project or RasaNLUModelConfig.DEFAULT_PROJECT_NAME model = model or None file_name = utils.create_temporary_file(data, "_training_data") if project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) model_name = self.project_store[project]._dynamic_load_model(model) self.project_store[project]._loader_lock.acquire() try: if not self.project_store[project]._models.get(model_name): interpreter = self.project_store[project]. \ _interpreter_for_model(model_name) self.project_store[project]._models[model_name] = interpreter finally: self.project_store[project]._loader_lock.release() return run_evaluation( data_path=file_name, model=self.project_store[project]._models[model_name], errors_filename=None ) def unload_model(self, project: Optional[Text], model: Text) -> Dict[Text, Any]: """Unload a model from server memory.""" if project is None: raise InvalidProjectError("No project specified") elif project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) try: unloaded_model = self.project_store[project].unload(model) return unloaded_model except KeyError: raise InvalidProjectError("Failed to unload model {} " "for project {}.".format(model, project))
'Start':start, 'WinSize':win, 'GroupName':gname, 'Subtype':sub, } if type(benj_res) is StringType: if (benj_res == 'Already Processed') or benj_res.startswith('Too few unique sequences'): continue print benj_res, prot, start, win else: benj_res.update(tdict) benj_writer.writerow(benj_res) if multi: pool.shutdown() # <codecell> # <codecell> #with open('allgp120.fasta', 'w') as handle: tres = [] for key, row in wanted_data[['gp120-seq-align', 'Tropism']].dropna().iterrows(): oname = key+'-'+row['Tropism'] tres.append((oname, ''.join(row['gp120-seq-align']))) # <codecell>
class DataRouter(object): def __init__(self, project_dir=None, max_training_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None): self._training_processes = max(max_training_processes, 1) self._current_training_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_store = self._create_project_store(project_dir) self.pool = ProcessPool(self._training_processes) def __del__(self): """Terminates workers pool processes""" self.pool.shutdown() @staticmethod def _create_query_logger(response_log): """Create a logger that will persist incoming query results.""" # Ensures different log files for different # processes in multi worker mode if response_log: # We need to generate a unique file name, # even in multiprocess environments timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') log_file_name = "rasa_nlu_log-{}-{}.log".format(timestamp, os.getpid()) response_logfile = os.path.join(response_log, log_file_name) # Instantiate a standard python logger, # which we are going to use to log requests utils.create_dir_for_file(response_logfile) out_file = io.open(response_logfile, 'a', encoding='utf8') query_logger = Logger( observer=jsonFileLogObserver(out_file, recordSeparator=''), namespace='query-logger') # Prevents queries getting logged with parent logger # --> might log them to stdout logger.info("Logging requests to '{}'.".format(response_logfile)) return query_logger else: # If the user didn't provide a logging directory, we wont log! logger.info("Logging of requests is disabled. " "(No 'request_log' directory configured)") return None def _collect_projects(self, project_dir): if project_dir and os.path.isdir(project_dir): projects = os.listdir(project_dir) else: projects = [] projects.extend(self._list_projects_in_cloud()) return projects def _create_project_store(self, project_dir): projects = self._collect_projects(project_dir) project_store = {} for project in projects: project_store[project] = Project(self.component_builder, project, self.project_dir, self.remote_storage) if not project_store: default_model = RasaNLUModelConfig.DEFAULT_PROJECT_NAME project_store[default_model] = Project( project=RasaNLUModelConfig.DEFAULT_PROJECT_NAME, project_dir=self.project_dir, remote_storage=self.remote_storage) return project_store def _pre_load(self, projects): logger.debug("loading %s", projects) for project in self.project_store: if project in projects: self.project_store[project].load_model() def _list_projects_in_cloud(self): try: from rasa_nlu.persistor import get_persistor p = get_persistor(self.remote_storage) if p is not None: return p.list_projects() else: return [] except Exception: logger.exception("Failed to list projects. Make sure you have " "correctly configured your cloud storage " "settings.") return [] @staticmethod def _create_emulator(mode): """Create emulator for specified mode. If no emulator is specified, we will use the Rasa NLU format.""" if mode is None: from rasa_nlu.emulators import NoEmulator return NoEmulator() elif mode.lower() == 'wit': from rasa_nlu.emulators.wit import WitEmulator return WitEmulator() elif mode.lower() == 'luis': from rasa_nlu.emulators.luis import LUISEmulator return LUISEmulator() elif mode.lower() == 'dialogflow': from rasa_nlu.emulators.dialogflow import DialogflowEmulator return DialogflowEmulator() else: raise ValueError("unknown mode : {0}".format(mode)) def extract(self, data): return self.emulator.normalise_request_json(data) def parse(self, data): project = data.get("project", RasaNLUModelConfig.DEFAULT_PROJECT_NAME) model = data.get("model") if project not in self.project_store: projects = self._list_projects(self.project_dir) cloud_provided_projects = self._list_projects_in_cloud() projects.extend(cloud_provided_projects) if project not in projects: raise InvalidProjectError( "No project found with name '{}'.".format(project)) else: try: self.project_store[project] = Project( self.component_builder, project, self.project_dir, self.remote_storage) except Exception as e: raise InvalidProjectError( "Unable to load project '{}'. " "Error: {}".format(project, e)) time = data.get('time') response = self.project_store[project].parse(data['text'], time, model) if self.responses: self.responses.info('', user_input=response, project=project, model=response.get('model')) return self.format_response(response) @staticmethod def _list_projects(path): """List the projects in the path, ignoring hidden directories.""" return [os.path.basename(fn) for fn in utils.list_subdirectories(path)] def parse_training_examples(self, examples, project, model): # type: (Optional[List[Message]], Text, Text) -> List[Dict[Text, Text]] """Parses a list of training examples to the project interpreter""" predictions = [] for ex in examples: logger.debug("Going to parse: {}".format(ex.as_dict())) response = self.project_store[project].parse(ex.text, None, model) logger.debug("Received response: {}".format(response)) predictions.append(response) return predictions def format_response(self, data): return self.emulator.normalise_response_json(data) def get_status(self): # This will only count the trainings started from this # process, if run in multi worker mode, there might # be other trainings run in different processes we don't know about. return { "max_training_processes": self._training_processes, "current_training_processes": self._current_training_processes, "available_projects": { name: project.as_dict() for name, project in self.project_store.items() } } def start_train_process(self, data_file, # type: Text project, # type: Text train_config, # type: RasaNLUModelConfig model_name=None # type: Optional[Text] ): # type: (...) -> Deferred """Start a model training.""" if not project: raise InvalidProjectError("Missing project name to train") if project in self.project_store: if self._training_processes <= self._current_training_processes: raise MaxTrainingError else: self.project_store[project].status = 1 elif project not in self.project_store: self.project_store[project] = Project( self.component_builder, project, self.project_dir, self.remote_storage) self.project_store[project].status = 1 def training_callback(model_path): model_dir = os.path.basename(os.path.normpath(model_path)) self.project_store[project].update(model_dir) self._current_training_processes -= 1 self.project_store[project].current_training_processes -= 1 if (self.project_store[project].status == 1 and self.project_store[project].current_training_processes == 0): self.project_store[project].status = 0 return model_dir def training_errback(failure): logger.warn(failure) target_project = self.project_store.get( failure.value.failed_target_project) self._current_training_processes -= 1 self.project_store[project].current_training_processes -= 1 if (target_project and self.project_store[project].current_training_processes == 0): target_project.status = 0 return failure logger.debug("New training queued") self._current_training_processes += 1 self.project_store[project].current_training_processes += 1 result = self.pool.submit(do_train_in_worker, train_config, data_file, path=self.project_dir, project=project, fixed_model_name=model_name, storage=self.remote_storage) result = deferred_from_future(result) result.addCallback(training_callback) result.addErrback(training_errback) return result def evaluate(self, data, project=None, model=None): # type: (Text, Optional[Text], Optional[Text]) -> Dict[Text, Any] """Perform a model evaluation.""" project = project or RasaNLUModelConfig.DEFAULT_PROJECT_NAME model = model or None file_name = utils.create_temporary_file(data, "_training_data") test_data = load_data(file_name) if project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) preds_json = self.parse_training_examples(test_data.intent_examples, project, model) predictions = [ {"text": e.text, "intent": e.data.get("intent"), "predicted": p.get("intent", {}).get("name"), "confidence": p.get("intent", {}).get("confidence")} for e, p in zip(test_data.intent_examples, preds_json) ] y_true = [e.data.get("intent") for e in test_data.intent_examples] y_true = clean_intent_labels(y_true) y_pred = [p.get("intent", {}).get("name") for p in preds_json] y_pred = clean_intent_labels(y_pred) report, precision, f1, accuracy = get_evaluation_metrics(y_true, y_pred) return { "intent_evaluation": { "report": report, "predictions": predictions, "precision": precision, "f1_score": f1, "accuracy": accuracy} } def unload_model(self, project, model): # type: (Text, Text) -> Dict[Text] """Unload a model from server memory.""" if project is None: raise InvalidProjectError("No project specified") elif project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) try: unloaded_model = self.project_store[project].unload(model) return unloaded_model except KeyError: raise InvalidProjectError("Failed to unload model {} " "for project {}.".format(model, project))
def compute_heatmap(self, reader, plot_type, time_mode, time_interval, cache_size=-1, num_of_pixel_of_time_dim=-1, num_of_threads=os.cpu_count(), **kwargs): """ calculate the data for plotting heatmap :param reader: reader for data :param plot_type: types of data, see heatmap (function) for details :param time_mode: real time (r) or virtual time (v) :param time_interval: the window size in computation :param cache_size: size of cache :param num_of_pixel_of_time_dim: as an alternative to time_interval, useful when you don't know the trace time span :param num_of_threads: number of threads/processes to use for computation, default: all :param kwargs: cache_params, :return: a two-dimension list, the first dimension is x, the second dimension is y, the value is the heat value """ bp = get_breakpoints(reader, time_mode, time_interval, num_of_pixel_of_time_dim) ppe = ProcessPoolExecutor(max_workers=num_of_threads) futures_dict = {} progress = 0 xydict = np.zeros((len(bp)-1, len(bp)-1)) if plot_type in [ "avg_rd_st_et", "rd_distribution", "rd_distribution_CDF", "future_rd_distribution", "dist_distribution", "rt_distribution" ]: pass elif plot_type == "hr_st_et": ema_coef = kwargs.get("ema_coef", DEF_EMA_HISTORY_WEIGHT) enable_ihr = kwargs.get("interval_hit_ratio", False) or kwargs.get("enable_ihr", False) if kwargs.get("algorithm", "LRU").lower() == "lru": #TODO: replace CLRUProfiler with PyLRUProfiler rd = LRUProfiler(reader).get_reuse_distance() last_access_dist = get_last_access_dist(reader) for i in range(len(bp) - 1): futures_dict[ppe.submit(cal_hr_list_LRU, rd, last_access_dist, cache_size, bp, i, enable_ihr=enable_ihr, ema_coef=ema_coef)] = i else: reader_params = reader.get_params() reader_params["open_c_reader"] = False cache_class = cache_name_to_class(kwargs.get("algorithm")) cache_params = kwargs.get("cache_params", {}) for i in range(len(bp) - 1): futures_dict[ppe.submit(cal_hr_list_general, reader.__class__, reader_params, cache_class, cache_size, bp, i, cache_params=cache_params)] = i elif plot_type == "hr_st_size": raise RuntimeError("Not Implemented") elif plot_type == "KL_st_et": rd = LRUProfiler(reader).get_reuse_distance() for i in range(len(bp) - 1): futures_dict[ppe.submit(cal_KL, rd, bp, i)] = i else: ppe.shutdown() raise RuntimeError("{} is not a valid heatmap type".format(plot_type)) last_progress_print_time = time.time() for future in as_completed(futures_dict): result = future.result() xydict[-len(result):, futures_dict[future]] = np.array(result) # print("{} {}".format(xydict[futures_dict[future]], np.array(result))) progress += 1 if time.time() - last_progress_print_time > 20: INFO("{:.2f}%".format(progress / len(futures_dict) * 100), end="\r") last_progress_print_time = time.time() ppe.shutdown() return xydict
class BokehTornado(TornadoApplication): ''' A Tornado Application used to implement the Bokeh Server. The Server class is the main public interface, this class has Tornado implementation details. Args: applications (dict of str : bokeh.application.Application) : map from paths to Application instances The application is used to create documents for each session. extra_patterns (seq[tuple]) : tuples of (str, http or websocket handler) Use this argmument to add additional endpoints to custom deployments of the Bokeh Server. ''' def __init__(self, applications, io_loop=None, extra_patterns=None): if io_loop is None: io_loop = IOLoop.current() self._loop = io_loop self._resources = {} # Wrap applications in ApplicationContext self._applications = dict() for k,v in applications.items(): self._applications[k] = ApplicationContext(v, self._loop) extra_patterns = extra_patterns or [] relative_patterns = [] for key in applications: for p in per_app_patterns: if key == "/": route = p[0] else: route = key + p[0] relative_patterns.append((route, p[1], { "application_context" : self._applications[key] })) websocket_path = None for r in relative_patterns: if r[0].endswith("/ws"): websocket_path = r[0] if not websocket_path: raise RuntimeError("Couldn't find websocket path") for r in relative_patterns: r[2]["bokeh_websocket_path"] = websocket_path all_patterns = extra_patterns + relative_patterns + toplevel_patterns log.debug("Patterns are: %r", all_patterns) super(BokehTornado, self).__init__(all_patterns, **settings) self._clients = set() self._executor = ProcessPoolExecutor(max_workers=4) self._loop.add_callback(self._start_async) self._stats_job = PeriodicCallback(self.log_stats, 15.0 * 1000, io_loop=self._loop) self._stats_job.start() self._unused_session_linger_seconds = 60*30 self._cleanup_job = PeriodicCallback(self.cleanup_sessions, 17.0 * 1000, io_loop=self._loop) self._cleanup_job.start() @property def io_loop(self): return self._loop def root_url_for_request(self, request): # If we add a "whole server prefix," we'd put that on here too return request.protocol + "://" + request.host + "/" def websocket_url_for_request(self, request, websocket_path): protocol = "ws" if request.protocol == "https": protocol = "wss" return protocol + "://" + request.host + websocket_path def resources(self, request): root_url = self.root_url_for_request(request) if root_url not in self._resources: self._resources[root_url] = Resources(mode="server", root_url=root_url) return self._resources[root_url] def start(self): ''' Start the Bokeh Server application main loop. Args: Returns: None Notes: Keyboard interrupts or sigterm will cause the server to shut down. ''' try: self._loop.start() except KeyboardInterrupt: print("\nInterrupted, shutting down") def stop(self): ''' Stop the Bokeh Server application. Returns: None ''' self._loop.stop() @property def executor(self): return self._executor def new_connection(self, protocol, socket, application_context, session): connection = ServerConnection(protocol, socket, application_context, session) self._clients.add(connection) return connection def client_lost(self, connection): self._clients.discard(connection) connection.detach_session() def get_session(self, app_path, session_id): if app_path not in self._applications: raise ValueError("Application %s does not exist on this server" % app_path) return self._applications[app_path].get_session(session_id) def cleanup_sessions(self): for app in self._applications.values(): app.cleanup_sessions(self._unused_session_linger_seconds) def log_stats(self): log.debug("[pid %d] %d clients connected", os.getpid(), len(self._clients)) @gen.coroutine def run_in_background(self, _func, *args, **kwargs): """ Run a synchronous function in the background without disrupting the main thread. Useful for long-running jobs. """ res = yield self._executor.submit(_func, *args, **kwargs) raise gen.Return(res) @gen.coroutine def _start_async(self): try: atexit.register(self._atexit) signal.signal(signal.SIGTERM, self._sigterm) except Exception: self.exit(1) _atexit_ran = False def _atexit(self): if self._atexit_ran: return self._atexit_ran = True self._stats_job.stop() IOLoop.clear_current() loop = IOLoop() loop.make_current() loop.run_sync(self._cleanup) def _sigterm(self, signum, frame): print("Received SIGTERM, shutting down") self.stop() self._atexit() @gen.coroutine def _cleanup(self): log.debug("Shutdown: cleaning up") self._executor.shutdown(wait=False) self._clients.clear()
class AMRProcessor: def __init__(self, max_workers=None, verbose=False): self.verbose = verbose self.pool = ProcessPoolExecutor(max_workers=max_workers) if max_workers is None or max_workers > 1 else None def shutdown(self, wait=True): if self.pool: self.pool.shutdown(wait=wait) def sentences(self, gold_lines, silver_lines, verbose=False, seed=None, loop=None): def extract_amr_pairs(gold_lines, silver_lines): while True: gold_amr = AMR.read(gold_lines) silver_amr = AMR.read(silver_lines) if gold_amr is None and silver_amr is None: break elif gold_amr is None or silver_amr is None: raise Exception('mismatched AMR count') yield gold_amr, silver_amr AMRPair.seed = seed self.total_match_num = 0 self.total_test_num = 0 self.total_gold_num = 0 self.skipped = 0 self.good = 0 nr = 0 def process_sentence(sentence): nonlocal nr sentence = Dict(sentence) sentence.gold = Dict(sentence.gold) sentence.silver = Dict(sentence.silver) nr += 1 if not sentence: if verbose: print('Skipping sentence:', gold_amr.text) self.skipped += 1 return if verbose: print(sentence.gold.text) gold_triple_num = len(sentence.gold.instances) + len(sentence.gold.attributes) + len(sentence.gold.relations) test_triple_num = len(sentence.silver.instances) + len(sentence.silver.attributes) + len(sentence.silver.relations) gold_triple_num += 1 if sentence.gold.top else 0 test_triple_num += 1 if sentence.silver.top else 0 # if each AMR pair should have a score, compute and output it here sentence.precision, sentence.recall, sentence.best_f_score = smatch.compute_f(sentence.best_match_num, test_triple_num, gold_triple_num) # sentence.precision = precision # sentence.recall = recall # sentence.best_f_score = best_f_score self.total_match_num += sentence.best_match_num self.total_test_num += test_triple_num self.total_gold_num += gold_triple_num if verbose: print() print("Precision: %.4f" % sentence.precision) print("Recall: %.4f" % sentence.recall) print("Smatch score: %.4f" % sentence.best_f_score) print() else: print('.', end='', flush=True) self.good += 1 sentence.nr = nr return sentence if loop is None: loop = asyncio.get_event_loop() class AMap: def __init__(self, func, futures): self.func = func self.futures = iter(futures) async def __aiter__(self): return self async def __anext__(self): try: future = next(self.futures) except StopIteration: raise StopAsyncIteration return self.func(await future) results = list(loop.run_in_executor(self.pool, AMRPair.make, amr_pair) for amr_pair in extract_amr_pairs(gold_lines, silver_lines)) return AMap(process_sentence, results) # for result in results: # sentence = await result # sentence = process_sentence(sentence) # for sentence in (self.pool.map if self.pool else map)(AMRPair.make, extract_amr_pairs(gold_lines, silver_lines)): # sentence = process_sentence(sentence) # yield sentence async def __call__(self, gold_lines, silver_lines, verbose=False, seed=None): sentences = [] async for sentence in self.sentences(gold_lines, silver_lines, verbose=verbose, seed=seed): sentences.append(sentence) # sentences = list(self.sentences(gold_lines, silver_lines, verbose=verbose, seed=seed)) precision, recall, best_f_score = smatch.compute_f(self.total_match_num, self.total_test_num, self.total_gold_num) if verbose: print("Total:") print() print("Precision: %.4f" % precision) print("Recall: %.4f" % recall) print("Smatch score: %.4f" % best_f_score) return Dict(sentences=sentences, precision=precision, recall=recall, best_f_score=best_f_score, skipped=self.skipped, good=self.good)