def _itergroundings(self, simplify=False, unsatfailure=False): global global_bpll_grounding global_bpll_grounding = self if self.multicore: pool = Pool(maxtasksperchild=1) try: for gndresult in pool.imap(with_tracing(create_formula_groundings), self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) checkmem() yield None except CtrlCException as e: pool.terminate() raise e pool.close() pool.join() else: for gndresult in imap(create_formula_groundings, self.formulas): for fidx, stat in gndresult: for (varidx, validx, val) in stat: self._varidx2fidx[varidx].add(fidx) self._addstat(fidx, varidx, validx, val) yield None
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(width=100, steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() try: for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for gfs in imap(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def run(): setup_logger() logger.info('Started') queue = multiprocessing.Queue(maxsize=EVENT_QUEUE_MAX_SIZE) pool = Pool(processes=WORKERS, initializer=worker, initargs=(queue,)) event_handler = EventHandler(queue) observer = init_observer() try: delete_all_files(FRAMES_PATH) observer.schedule(event_handler, path=FRAMES_PATH, recursive=True) signal.signal(signal.SIGINT, signal_handler) observer.start() while True: pool._maintain_pool() #restart workers if needed time.sleep(1) now = datetime.datetime.now() if now - event_handler.last_event > datetime.timedelta(minutes=1): logger.warning("No events received in the last minute.") # Sometimes watchdog stops receiving events. # We exit, so the process can be restarted. break except KeyboardInterrupt as err: logger.warning("Keyboard interruption") except Exception as err: logger.exception(err) finally: observer.stop() observer.join() pool.terminate() logger.warning("Bye")
def work(host, port, processes, threads, times): pool = Pool(processes, lambda: signal.signal(signal.SIGINT, signal.SIG_IGN)) p = Process(target=progress) p.daemon = True start = time.time() try: for chunk in divide(times, processes): pool.apply_async(thread, (host, port, threads, chunk)) p.start() pool.close() pool.join() p.terminate() p.join() except KeyboardInterrupt: pool.terminate() p.terminate() p.join() pool.join() return time.time() - start
def run(self, test_name=None, db_adapter=None): if db_adapter is None: db_adapter = DEFAULT_DATABASE_ADAPTER if test_name is None: test_name = '_'.join([db_adapter, datetime.datetime.now().strftime("%Y-%m-%d %H:%M")]) print ''.join(['Running "', test_name, '" test']) print 'Prepare database' adapter = adapter_factory(db_adapter) adapter.prepare_db() test_id = adapter.create_new_test(test_name) print '' print 'Create user documents' pool = Pool(processes=10) params = [{'user_id': i, 'docs_per_user': DOCS_PER_USER, 'db_adapter': db_adapter} for i in range(1, USERS_COUNT + 1)] start = time.time() try: pool.map(create_users, params) print 'Full time:', time.time() - start finally: pool.terminate() del pool print 'OK! Users were created!' print '' for i in range(1, MAX_PROCESSES + 1): print 'Run test with %d proceses' % i pool = Pool(processes=i) params = [{'user_id': j, 'db_adapter': db_adapter} for j in range(1, USERS_COUNT + 1)] start = time.time() try: res = pool.map(update_users, params) full_time = time.time() - start finally: pool.terminate() del pool print 'Test is finished! Save results' print '' adapter.save_results(test_id, res, i) print 'Full time:', full_time print '' print 'Finish!'
def create_initial_partial_vocabs(all_files, path_to_dump: str): partial_vocabs_queue = [] files_total = len(all_files) current_file = 0 chunk_generator = create_chunk_generator(len(all_files), N_CHUNKS) params = [(file, path_to_dump, chunk) for file, chunk in zip(all_files, chunk_generator)] pool = Pool() partial_vocab_it = pool.imap_unordered(create_and_dump_partial_vocab, params) for partial_vocab in partial_vocab_it: partial_vocabs_queue.append(partial_vocab) current_file += 1 logger.info( f"To partial vocabs added {current_file} out of {files_total}") pool.terminate() return partial_vocabs_queue
def postprocd(self, func, nthreads=1, pool=None): """ Post-process some values into this chain. Args: func : a function which accepts all the keys in the chain and returns a dictionary of new keys to add. `func` must accept *all* keys in the chain, if there are ones you don't need, capture them with **_ in its call signature, e.g. to add in a parameter 'b' which is 'a' squared, use postprocd(lambda a,**_: {'b':a**2}) nthreads : the number of threads to use pool : any worker pool which has a pool.map function. default: multiprocessing.Pool(nthreads) Returns: A new chain with the new values post-processed in. Does not alter the original chain. If for some rows in the chain `func` did not return all the keys, these will be filled in with `nan`. Note: This repeatedly calls `func` on rows in the chain, so its very inneficient if you already have a vectorized version of your post-processing function. `postprocd` is mostly useful for slow non-vectorized post-processing functions, allowing convenient use of the `nthreads` option to this function. For the default implementation of `pool`, `func` must be picklable, meaning it must be a module-level function. """ if pool is not None: _pool = pool elif nthreads != 1: _pool = Pool(nthreads) else: _pool = None mp = map if _pool is None else _pool.map try: dat = mp(partial(_postprocd_helper, func), self.iterrows()) finally: if pool is None and _pool is not None: _pool.terminate() c = self.copy() allkeys = set(chain(*[d.keys() for d in dat])) c.update({k: array([d.get(k, nan) for d in dat]) for k in allkeys}) return c
def postprocd(self,func,nthreads=1,pool=None): """ Post-process some values into this chain. Args: func : a function which accepts all the keys in the chain and returns a dictionary of new keys to add. `func` must accept *all* keys in the chain, if there are ones you don't need, capture them with **_ in its call signature, e.g. to add in a parameter 'b' which is 'a' squared, use postprocd(lambda a,**_: {'b':a**2}) nthreads : the number of threads to use pool : any worker pool which has a pool.map function. default: multiprocessing.Pool(nthreads) Returns: A new chain with the new values post-processed in. Does not alter the original chain. If for some rows in the chain `func` did not return all the keys, these will be filled in with `nan`. Note: This repeatedly calls `func` on rows in the chain, so its very inneficient if you already have a vectorized version of your post-processing function. `postprocd` is mostly useful for slow non-vectorized post-processing functions, allowing convenient use of the `nthreads` option to this function. For the default implementation of `pool`, `func` must be picklable, meaning it must be a module-level function. """ if pool is not None: _pool = pool elif nthreads!=1: _pool = Pool(nthreads) else: _pool = None mp=map if _pool is None else _pool.map try: dat = mp(partial(_postprocd_helper,func),self.iterrows()) finally: if pool is None and _pool is not None: _pool.terminate() c=self.copy() allkeys = set(chain(*[d.keys() for d in dat])) c.update({k:array([d.get(k,nan) for d in dat]) for k in allkeys}) return c
def run(config_uri, app_name=None, username=None, types=(), batch_size=500, processes=None): # multiprocessing.get_context is Python 3 only. from multiprocessing import get_context from multiprocessing.pool import Pool # Loading app will have configured from config file. Reconfigure here: logging.getLogger('snovault').setLevel(logging.DEBUG) testapp = internal_app(config_uri, app_name, username) connection = testapp.app.registry[CONNECTION] uuids = [str(uuid) for uuid in connection.__iter__(*types)] transaction.abort() logger.info('Total items: %d' % len(uuids)) pool = Pool( processes=processes, initializer=initializer, initargs=(config_uri, app_name, username), context=get_context('forkserver'), ) all_results = [] try: for result in pool.imap_unordered(worker, batched(uuids, batch_size), chunksize=1): results = result['results'] errors = sum(error for item_type, path, update, error in results) updated = sum(update for item_type, path, update, error in results) logger.info('Batch: Updated %d of %d (errors %d)' % (updated, len(results), errors)) all_results.extend(results) finally: pool.terminate() pool.join() def result_item_type(result): # Ensure we always return a string return result[0] or '' for item_type, results in itertools.groupby( sorted(all_results, key=result_item_type), key=result_item_type): results = list(results) errors = sum(error for item_type, path, update, error in results) updated = sum(update for item_type, path, update, error in results) logger.info('Collection %s: Updated %d of %d (errors %d)' % (item_type, updated, len(results), errors))
def multiprocess_all_chromosomes(func, cls, *args, **kwargs): ''' Convenience method for splitting up queries based on tag id. ''' processes = current_settings.ALLOWED_PROCESSES set_chromosome_lists(cls, use_table=kwargs.get('use_table', None)) p = Pool(processes) try: for chr_list in current_settings.CHR_LISTS: p.apply_async(func, args=[cls, chr_list, ] + list(args)) p.close() p.join() except Exception as e: print('Terminating pool.') p.terminate() raise e
def stress_the_melon(processes=10, times=500, url=URL): """ :param processes: ammount of processes running at the same time :param times: how many requests you want to make (sum) :param url: the url you want to get """ print('Stress test working with ' + str(processes) + ' threads and ' + str(times) + ' requests\nURL: ' + URL) pool = Pool(processes=int(processes)) for i in range(0, int(times)): result = pool.apply_async(do_get_request, [ url, ]) if result.get() != 200: print('OK, the Melon just died :(') pool.terminate()
def apply_forces(self, inds): """ Uses multithreading to apply loads to the unit-stress tensors in the individuals provided. inputs: inds: tensor_ind objects representing a series of designs. outputs: app: A nxm array of stress tensors, where n is the number of individuals in inds, and m is the number of elements that data was requested from in each individual's apply_force method. """ pool = Pool(8) args_to_pool = [[x, self.sto_force_x, self.sto_force_y] for x in inds] app = pool.starmap(self.call_apply, args_to_pool) pool.terminate() return app
def crawl_companies_files(phantomjs_path, workers_num=10, include_companies=None, from_date=None): companies_files = [] pool = Pool(processes=workers_num) try: # Obtain the ccvm codes of all the listed companies ccvm_codes = [ r.ccvm for r in BovespaCompany.objects.only(["ccvm"]).all() ] ccvm_codes = sorted(ccvm_codes) _logger.debug("Processing the files of {0} companies from {1}".format( len(ccvm_codes), "{0:%Y-%m-%d}".format(from_date) if from_date else "THE BEGINNING")) func_params = [] for ccvm_code in ccvm_codes: if include_companies and ccvm_code not in include_companies: continue for doc_type in DOC_TYPES: func_params.append( [ccvm_code, phantomjs_path, doc_type, from_date]) # call_results = pool.starmap(obtain_company_files, func_params) pool.starmap(obtain_company_files, func_params) # Merge all the responses into one only list # companies_files += list( # itertools.chain.from_iterable(call_results)) except TimeoutError: print("Timeout error") traceback.print_exc() raise finally: pool.close() pool.join() pool.terminate()
def show_logbook(): """ Show information about any jobs currently running Uses many threads to poll the job board because the latency can be high but the processing power required is low :return None: """ print("Not connected to jobboard") pool = Pool(processes=SHOW_POLLERS) try: for _ in range(SHOW_POLLERS): pool.apply(query_and_print) sleep(0.1) except KeyboardInterrupt: pool.terminate() except Exception: pool.terminate() finally: pool.close() pool.join()
def join(self): try: while True: total_tasks = len(self.results) done_tasks = 0 for result in self.results: if result.ready(): done_tasks += 1 if done_tasks == total_tasks: self.progress('[%d task(s) completed, %d process(es)]', done_tasks, self._processes) break else: self.progress('[%d task(s) completed, %d remaining, %d process(es)]', done_tasks, total_tasks - done_tasks, self._processes) time.sleep(0.001) except KeyboardInterrupt: NativePool.terminate(self) return NativePool.join(self)
def _run_pool(uuids, args): from multiprocessing import get_context from multiprocessing.pool import Pool transaction.abort() pool = Pool( processes=args.processes, initializer=_pool_initializer, initargs=(args.config_uri, args.app_name, args.username), context=get_context('forkserver'), maxtasksperchild=args.maxtasksperchild, ) est_loops = int(len(uuids) / args.batchsize) all_results = [] try: pool_gen = pool.imap_unordered( _pool_worker, _pool_batch_results(uuids, args.batchsize), chunksize=args.chunksize, ) for loop, result in enumerate(pool_gen, 1): results = result['results'] error_msgs = [ error_msg for _, _, _, _, error_msg in results if error_msg ] updated_cnt = sum(update for _, _, update, _, _ in results) log_msg = "{} of ~{} Batch: Updated {} of {} (errors {})".format( loop, est_loops, updated_cnt, len(results), len(error_msgs), ) BATCH_UPGRADE_LOG.info(log_msg) for error_msg in error_msgs: BATCH_UPGRADE_LOG.error("\t%s", error_msg) all_results.extend(results) finally: pool.terminate() pool.join() return all_results
def join(self): try: while True: total_tasks = len(self.results) done_tasks = 0 for result in self.results: if result.ready(): done_tasks += 1 if done_tasks == total_tasks: self.progress('[%d task(s) completed, %d process(es)]', done_tasks, self._processes) break else: self.progress( '[%d task(s) completed, %d remaining, %d process(es)]', done_tasks, total_tasks - done_tasks, self._processes) time.sleep(0.001) except KeyboardInterrupt: NativePool.terminate(self) return NativePool.join(self)
def image_urls(self): """ Iterates over json obj, gets image links Creates pool of workers, creates new workers """ json_obj = self.jsonify() for post in json_obj["posts"]: if "ext" in post: self.total_count.value += 1 try: self.thread_name = self.args.name except (KeyError, NameError): self.thread_name = json_obj["posts"][0]["sub"].replace(" ", "_") else: self.thread_name = str(json_obj["posts"][0]["no"]) for post in json_obj["posts"]: if "ext" in post: filename = post["tim"] + post["ext"] image_url = "https://8ch.net/{board}/src/{file}".format(board=self.board, file=filename) self.downloads.append((image_url, filename)) self.download_image(image_url, filename) with self.counter.get_lock(): self.counter.value += 1 update_progress(self.counter.value, self.total_count.value) pool = Pool(self.workers) pool_map = pool.map_async(self.download_image, self.downloads) try: pool_map.get(0xFFFF) except KeyboardInterrupt: print("Aborting") pool.terminate() pool.join() else: pool.close() pool.join()
def run_check(check_list, pid): queues = queue.Queue() pool = Pool(10) # 创建一个线程池,10个线程数 for fn in check_list: if mongo.db.tasks.find_one({"id": pid}) == None: pool.terminate() pool.close() return False print(fn) pool.apply_async(waf_check, (fn, queues)) pool.close() # 关闭进程池,不再接受新的进程 pool.join() # 主进程阻塞等待子进程的退出 checkd_list = list(queues.queue) return checkd_list
def poolHandle(zip,nid): if DEBUG_LEVEL ==0 : p = Pool(80) for sub in zip.namelist(): fobj = getSubFobj(zip,sub) if fobj != None : p.apply_async(handleSub,args=(fobj,nid)) p.close() p.join() elif DEBUG_LEVEL ==1 : p = billiard.Pool() _finalizers.append(Finalize(p, p.terminate)) try: p.map_async(handleSub, [(getSubFobj(zip,sub),nid) for sub in zip.namelist()]) p.close() p.join() finally: p.terminate() else : for sub in zip.namelist(): fobj = getSubFobj(zip,sub) if fobj != None : handleSub(fobj,nid) zip.close()
def download_files(cache_folder, files_per_ccvm_and_doc_type, doc_types, workers_num=10, force_download=False, include_companies=None): pool = Pool(processes=workers_num) try: func_params = [] for key, files in files_per_ccvm_and_doc_type.items(): ccvm, doc_type = key.split("_") # We process only the informed companies, if there is any informed if include_companies and ccvm not in include_companies: continue for (fiscal_date, protocol, version, doc_type, delivery_type, delivery_date) in files: filename = "CCVM_{0}_{1:%Y%m%d}_{2}.{3}".format( ccvm, fiscal_date, version.replace(".",""), doc_type) func_params.append([ cache_folder, ccvm, fiscal_date, version, doc_type, protocol, force_download]) _logger.debug("Downloading {} files...".format(len(func_params))) call_results = pool.starmap(download_file, func_params) generate_dataset(call_results) except TimeoutError: _logger.exception("Timeout error") raise finally: pool.close() pool.join() pool.terminate()
def handle(self, *args, **options): global crawler_clazz, crawler workers_num = options.get("workers_num", 1) results = [] pool = Pool(processes=workers_num) try: crawler_params = crawler.crawl_params(**options) _logger.info("Starting crawling with {0} params.".format( len(crawler_params))) func_params = [] for crawler_param in crawler_params: func_params.append([crawler_param, options]) _logger.info("Starting a Pool of %d processes" % workers_num) # crawl(*func_params[0]) # call_results = pool.starmap(crawl, func_params) pool.starmap(crawl, func_params) # Merge all the responses into one only list # if call_results: # results += list( # itertools.chain.from_iterable(call_results)) # _logger.info("Crawler results ({0}): {1}". # format(len(results), results)) _logger.info("Crawler successfully finished!") except TimeoutError: _logger.error("Timeout error") finally: pool.close() pool.join() pool.terminate()
def main(): global sqs_conn, sqs_queue args = parse_args() start_time = datetime.datetime.utcnow() first_start_time = start_time print "first start: %s" % first_start_time with open(args.get('config'), 'r') as f: config = json.load(f) sqs_config = config.get('sqs') sqs_conn = boto.sqs.connect_to_region(**sqs_config) queue_name = 'baas20sr_usea_baas20sr_usea_index_all_dead' sqs_queue = sqs_conn.get_queue(queue_name) last_size = sqs_queue.count() print 'Last Size: ' + str(last_size) pool = Pool(10) keep_going = True while keep_going: sqs_messages = sqs_queue.get_messages( num_messages=10, visibility_timeout=10, wait_time_seconds=10) if len(sqs_messages) > 0: pool.map(check_exists, sqs_messages) else: print 'DONE!' pool.terminate() keep_going = False
def _itergroundings(self, simplify=True, unsatfailure=True): # generate all groundings if not self.formulas: return global global_fastConjGrounding global_fastConjGrounding = self batches = list(rndbatches(self.formulas, 20)) batchsizes = [len(b) for b in batches] if self.verbose: bar = ProgressBar(steps=sum(batchsizes), color='green') i = 0 if self.multicore: pool = Pool() try: for gfs in pool.imap(with_tracing(create_formula_groundings), batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf except Exception as e: logger.error('Error in child process. Terminating pool...') pool.close() raise e finally: pool.terminate() pool.join() else: for gfs in map(create_formula_groundings, batches): if self.verbose: bar.inc(batchsizes[i]) bar.label(str(cumsum(batchsizes, i + 1))) i += 1 for gf in gfs: yield gf
def image_urls(self): """ Iterates over json obj, gets image links Creates pool of workers, creates new workers """ json_obj = self.jsonify() for post in json_obj['posts']: if 'ext' in post: self.total_count.value += 1 self.thread_name = json_obj['posts'][0]['semantic_url'] for post in json_obj['posts']: if 'ext' in post: filename = str(post['tim']) + post['ext'] image_url = 'https://i.4cdn.org/{board}/{file}'.format( board=self.board, file=filename) self.filename.append(filename) self.downloads.append(image_url) self.download_image(image_url, filename) with self.counter.get_lock(): self.counter.value += 1 update_progress(self.counter.value, self.total_count.value) manager = Manager() pool_data = manager.list(self.downloads) partial_data = partial(self.download_image, pool_data) pool = Pool(self.workers) pool_map = pool.map_async(partial_data, self.filename) try: pool.close() pool.join() except KeyboardInterrupt: print("Aborting") pool.terminate() pool.join()
def main(args): thread_count = args.n or 1 p = Pool(thread_count) start = time.time() if args.f == 'ping': args_ips = args.ip.split('-') if args_ips[1]: ips = findips(args_ips[0], args_ips[1]) else: ips = args_ips for ip in ips: p.apply_async(ping, args=(ip, )) # 异步进程池 elif args.f == 'tcp': ip = args.ip port_dict = {'ip': ip, 'port': {}} for port in range(1, 10001): result = p.apply_async(tcp, args=(ip, port)) # 异步进程池 presult = result.get() if presult: port_dict['port'][port] = 'Open' print(port_dict) json_str = json.dumps(port_dict) path = os.path.abspath(os.path.dirname(__file__)) filename = args.w or f'{path}/result.json' with open(filename, 'w') as f: f.write(json_str) p.close() p.join() p.terminate() end = time.time() if args.v: print('Time : ', end - start)
def competition(): p = Pool(processes = 4) total = 0 scores = {} mycmd = 'examples.Greedy' competitors = ['examples.Greedy'] levels = ['map00'] pairs = itertools.product([mycmd], competitors) games = list(itertools.product(levels, pairs)) print "Running against %i commanders on %i levels, for a total of %i games.\n" % (len(competitors), len(levels), len(games)) try: for level, results in p.map(run, games): for (_, bot), score in results.items(): scores.setdefault(bot, [0, 0, 0, 0, 0]) scores[bot][0] += score[0] # Flags captured. scores[bot][1] += score[1] # Flags conceded. scores[bot][2] += int(score[0] > score[1]) # Win. scores[bot][3] += int(score[0] == score[1]) # Draw. scores[bot][4] += int(score[1] > score[0]) # Loss. total += 1 except KeyboardInterrupt: print "\nTerminating competition due to keyboard interrupt." p.terminate() p.join() else: print "\n" for r, s in sorted(scores.items(), key = lambda i: i[1][2]*30 + i[1][3]*10 + i[1][0] - i[1][1], reverse = True): nick = r.replace('Commander', '') if nick in mycmd: continue print "{}\n\tCaptured {} flags and conceded {}.\n\tWon {}, drew {} and lost {}.\n".format(nick.upper(), *s) print '\n\nAll matches played against {}; best opponent at top of list.\n'.format(mycmd)
def _run_with_multiprocessing( process, total_tiles, zoom_levels, multi, quiet, debug ): LOGGER.debug("run with multiprocessing") num_processed = 0 LOGGER.info("run process using %s workers", multi) f = partial(_process_worker, process) with tqdm.tqdm( total=total_tiles, unit="tiles", disable=(quiet or debug) ) as pbar: for zoom in zoom_levels: process_tiles = process.get_process_tiles(zoom) pool = Pool(multi) try: for tile, output in pool.imap_unordered( f, process_tiles, # set chunksize to between 1 and MAX_CHUNKSIZE chunksize=min([ max([total_tiles // multi, 1]), MAX_CHUNKSIZE ]) ): pbar.update() num_processed += 1 except KeyboardInterrupt: LOGGER.info( "Caught KeyboardInterrupt, terminating workers") pool.terminate() break except Exception: pool.terminate() raise finally: pool.close() pool.join() process_tiles = None LOGGER.info("%s tile(s) iterated", (str(num_processed)))
class SinglePool(Concurrency): name = 'Concurrency: SinglePool' def __init__(self, **kwargs): self.pool = None super().__init__(**kwargs) def __initialize(self, solver, **kwargs): if self.pool is not None: kwargs['output'].debug(2, 2, 'Pool already inited') else: self.pool = Pool( processes=self.processes, initializer=initializer, initargs=(kwargs['instance'], solver) ) kwargs['output'].debug(2, 2, 'Init pool with %d processes' % self.processes) def __solve(self, tasks, **kwargs): output = kwargs['output'] res_list, results = [], [] for task in tasks: res = self.pool.apply_async(solve, (task,)) res_list.append(res) while len(res_list) > 0: res_list[0].wait() i = 0 while i < len(res_list): if res_list[i].ready(): res = res_list.pop(i) try: results.append(res.get()) except Exception as e: output.debug(0, 1, 'Pool solving was completed unsuccessfully: %s', e) else: i += 1 output.debug(2, 3, 'Already solved %d tasks' % len(results)) if not self.keep: self.terminate() return [result.set_value(self.measure.get(result)) for result in results] def single(self, task: Task, **kwargs) -> Result: cnf = kwargs['instance'].cnf().to_str(task.get()) report = self.propagator.solve(cnf) result = task.resolve(report.status, report.time, {}, report.solution) return result.set_value(self.measure.get(result)) def propagate(self, tasks: List[Task], **kwargs) -> List[Result]: self.__initialize(self.propagator, **kwargs) return self.__solve(tasks, **kwargs) def solve(self, tasks: List[Task], **kwargs) -> List[Result]: self.__initialize(self.solver, **kwargs) return self.__solve(tasks, **kwargs) def terminate(self): self.pool.terminate() self.pool = None
def __init__(self, game_board, depth, root): self.parent = [] self.expectimax_val = [] self.move = [] # If root node if root: # Use parallel execution self.game_board = game_board usePool = 0 if usePool: # Calculate the tree using a multiple process pool = Pool(processes=4) validMove = checkValidMoves(game_board) try: # Evaluaate the branches of the tree if validMove[0]: # self.right = MoveNode(right_move_return(game_board), depth) right_game_board = right_move_return(game_board) right_depth = depth right_result = pool.apply_async(MoveNode, (right_game_board, right_depth)) else: self.left = [] if validMove[1]: # self.left = MoveNode(left_move_return(game_board), depth) left_game_board = left_move_return(game_board) left_depth = depth left_result = pool.apply_async(MoveNode, (left_game_board, left_depth)) else: self.right = [] if validMove[2]: # self.up = MoveNode(up_move_return(game_board), depth) up_game_board = up_move_return(game_board) up_depth = depth up_result = pool.apply_async(MoveNode, (up_game_board, up_depth)) else: self.up = [] if validMove[3]: # self.down = MoveNode(down_move_return(game_board), depth) down_game_board = down_move_return(game_board) down_depth = depth down_result = pool.apply_async(MoveNode, (down_game_board, down_depth)) else: self.down = [] # Get the results from parallel pool if validMove[0]: self.right = right_result.get() if validMove[1]: self.left = left_result.get() if validMove[2]: self.up = up_result.get() if validMove[3]: self.down = down_result.get() # Close the pool on exception except: pool.close() pool.terminate() pool.join() # Close the pool pool.close() # Not using the parallel pool else: validMove = checkValidMoves(game_board) if validMove[0]: self.right = MoveNode(right_move_return(game_board), depth) else: self.left = [] if validMove[1]: self.left = MoveNode(left_move_return(game_board), depth) else: self.right = [] if validMove[2]: self.up = MoveNode(up_move_return(game_board), depth) else: self.up = [] if validMove[3]: self.down = MoveNode(down_move_return(game_board), depth) else: self.down = [] # Build branches not from the root node else: # self.left = MoveNode(left_move_return(game_board), depth) # self.right = MoveNode(right_move_return(game_board), depth) # self.up = MoveNode(up_move_return(game_board), depth) # self.down = MoveNode(down_move_return(game_board), depth) validMove = checkValidMoves(game_board) if validMove[0]: self.right = MoveNode(right_move_return(game_board), depth) else: self.left = [] if validMove[1]: self.left = MoveNode(left_move_return(game_board), depth) else: self.right = [] if validMove[2]: self.up = MoveNode(up_move_return(game_board), depth) else: self.up = [] if validMove[3]: self.down = MoveNode(down_move_return(game_board), depth) else: self.down = []
levels = ['map00', 'map01', 'map10', 'map11', 'map20', 'map30'] pairs = itertools.product([mycmd], competitors) games = list(itertools.product(levels, pairs)) print "Running against %i commanders on %i levels, for a total of %i games.\n" % (len(competitors), len(levels), len(games)) try: for level, results in p.map(run, games): for (_, bot), score in results.items(): scores.setdefault(bot, [0, 0, 0, 0, 0]) scores[bot][0] += score[0] # Flags captured. scores[bot][1] += score[1] # Flags conceded. scores[bot][2] += int(score[0] > score[1]) # Win. scores[bot][3] += int(score[0] == score[1]) # Draw. scores[bot][4] += int(score[1] > score[0]) # Loss. total += 1 except KeyboardInterrupt: print "\nTerminating competition due to keyboard interrupt." p.terminate() p.join() else: print "\n" for r, s in sorted(scores.items(), key = lambda i: i[1][2]*30 + i[1][3]*10 + i[1][0] - i[1][1], reverse = True): nick = r.replace('Commander', '') if nick in mycmd: continue print "{}\n\tCaptured {} flags and conceded {}.\n\tWon {}, drew {} and lost {}.\n".format(nick.upper(), *s) print '\n\nAll matches played against {}; best opponent at top of list.\n'.format(mycmd)
def train_loop(manager: Manager, log_queue: Queue): pool = Pool(processes=hp.NUM_POOL_WORKERS, initializer=pool_worker_init, initargs=(log_queue, ), maxtasksperchild=hp.MAX_GAMES_PER_POOL_WORKER) history_queue_file = "%s/history-queue.h5" % hp.OUTPUT_DIR if os.path.exists(history_queue_file): logging.info("Loading history queue from file: %s", history_queue_file) history_queue = manager.list( np.rec.array( util.read_from_hdf5_file(history_queue_file, "history_queue")[:])) else: history_queue = manager.list() self_play_model_file = hp.SELF_PLAY_MODEL_FILE trained_model_file = hp.TRAIN_MODEL_FILE num_clients_per_predict_worker = hp.NUM_POOL_WORKERS // hp.NUM_PREDICT_WORKERS + 1 self_play_predict_workers, self_play_model_wstate = spawn_predict_proxies( hp.NUM_PREDICT_WORKERS, num_clients_per_predict_worker, self_play_model_file, manager, history_queue, log_queue) train_workers, train_model_wstate = spawn_predict_proxies( 1, hp.NUM_POOL_WORKERS, self_play_model_file, manager, history_queue, log_queue) assert len(train_workers) == 1 train_worker = train_workers[0] try: for iter_index in range(hp.START_ITER, hp.START_ITER + hp.NUM_ITER): # self-play logging.info("Iter %d: Starting self-play", iter_index) self_play_results = pool.starmap( self_play_worker, zip(repeat(iter_index), range(hp.NUM_GAMES), repeat(history_queue), repeat(self_play_model_wstate))) logging.info("Iter %d, self-play results: %s", iter_index, self_play_results) # train NN logging.info("Iter %d: Starting network train", iter_index) train_worker.control_pipe.send((ControlActions.TRAIN, iter_index)) act, result = train_worker.control_pipe.recv() assert act == ControlActions.TRAIN_COMPLETED logging.info("Iter %d: Ended network train", iter_index) # eval logging.info("Iter %d: Starting evaluation", iter_index) eval_results = pool.starmap( multi_player_worker, zip(repeat(iter_index), range(hp.NUM_EVAL_GAMES), repeat(train_model_wstate), repeat(self_play_model_wstate))) logging.info("Iter %d: Evaluation end: results: %s", iter_index, eval_results) outcomes = np.array([outcome for _, outcome in eval_results]) trained_model_win_ratio = np.sum( outcomes == hp.OUTCOME_WIN_PLAYER_1) / len(outcomes) logging.info( "Iter %d evaluation: trained_model_win percent : %.2f%%", iter_index, trained_model_win_ratio * 100) if trained_model_win_ratio > hp.MIN_MODEL_REPLACEMENT_WIN_RATIO: stop_and_join_workers(*self_play_predict_workers) del self_play_model_wstate del self_play_predict_workers self_play_model_file = "%s/model-best-%00d-%.0f.h5" % ( hp.OUTPUT_DIR, iter_index, trained_model_win_ratio * 100) save_worker_model(train_worker, self_play_model_file) self_play_predict_workers, self_play_model_wstate = spawn_predict_proxies( hp.NUM_PREDICT_WORKERS, num_clients_per_predict_worker, self_play_model_file, manager, history_queue, log_queue) trained_model_file = "%s/model-train-%00d-%.0f.h5" % ( hp.OUTPUT_DIR, iter_index, trained_model_win_ratio * 100) save_worker_model(train_worker, trained_model_file) util.save_to_hdf5_file({"history_queue": list(history_queue)}, history_queue_file, compression='gzip') gc.collect() # end iter loop except Exception as e: if e is KeyboardInterrupt or e is SystemExit: logging.info("Terminated by user.") else: logging.error("Error: %s", e) pool.terminate() pool.join() terminate_workers(train_worker, *self_play_predict_workers) raise e else: stop_and_join_workers(train_worker, *self_play_predict_workers) logging.info('Done successfully.') finally: pool.close()
class _LocalRunner(_Runner): def __init__(self, max_tries = 3): super(_LocalRunner,self).__init__(max_tries) self.pool = None @classmethod def is_local(cls): return True @classmethod def max_tasks(cls): return cpu_count() def run(self,job): if isinstance(job, SplittableJob): job._split_to_tasks() jobs = job.tasks elif isinstance(job, collections.Callable): jobs = [job] else: jobs = job # for i,j in enumerate(jobs): # jw = JobWrapper(j) # jw() try: self.pool = Pool() results = [self.pool.apply_async(JobWrapper(j)) for j in jobs] for try_n in xrange(self.max_tries): for i, result in enumerate(results): if result is not None: result.wait() if result.successful() and jobs[i].test_success(): results[i] = None else: try: jobs[i].prepare_retry() except TypeError: pass results[i] = self.pool.apply_async(JobWrapper(jobs[i])) #Stop loop if all results were OK if all(r is None for r in results): break self.pool.close() self.pool.join() self.pool = None if not all(r is None for r in results): raise JobFailedException except (KeyboardInterrupt, SystemExit): if self.pool is not None: self.pool.terminate() self.pool.join() self.pool = None raise if isinstance(job, SplittableJob): job._merge_tasks()
class LLTInf(object): """Obtains a decision tree that classifies the given labeled traces. traces : a Traces object The set of labeled traces to use as training set depth : integer Maximum depth to be reached optimize_impurity : function. Optional, defaults to optimize_inf_gain A function that obtains the best parameters for a test in a given node according to some impurity measure. The should have the following prototype: optimize_impurity(traces, primitive, rho, disp) : (primitive, impurity) where traces is a Traces object, primitive is a depth 2 STL formula, rho is a list with the robustness degree of each trace up until this node in the tree and disp is a boolean that switches output display. The impurity returned should be so that the best impurity is the minimum one. stop_condition : list of functions. Optional, defaults to [perfect_stop] list of stopping conditions. Each stopping condition is a function from a dictionary to boolean. The dictionary contains all the information passed recursively during the construction of the decision tree (see arguments of lltinf_). disp : a boolean Switches displaying of debuggin output Returns a DTree object. TODO: Fix comments """ def __init__( self, depth=1, primitive_factory=llt.make_llt_primitives, optimize_impurity=impurity.ext_inf_gain, stop_condition=None, redo_after_failed=1, optimizer_args=None, times=None, fallback_impurity=impurity.inf_gain, log=False, ): self.depth = depth self.primitive_factory = primitive_factory self.optimize_impurity = optimize_impurity self.fallback_impurity = fallback_impurity if stop_condition is None: self.stop_condition = [perfect_stop] else: self.stop_condition = stop_condition if optimizer_args is None: optimizer_args = {} self.optimizer_args = optimizer_args self.times = times self.interpolate = times is not None if self.interpolate and len(self.times) > 1: self.tinter = self.times[1] - self.times[0] else: self.tinter = None self.tree = None self.redo_after_failed = redo_after_failed self._partial_add = 0 self.log = log if "workers" not in self.optimizer_args: self.pool = Pool(initializer=_pool_initializer) def pool_map(func, iterable): try: return self.pool.map_async(func, iterable).get(timeout=120) except KeyboardInterrupt: self.pool.terminate() self.pool.join() raise KeyboardInterrupt() self.pool_map = pool_map self.optimizer_args["workers"] = self.pool_map def __del__(self): if hasattr(self, "pool"): self.pool.terminate() self.pool.join() def __exit__(self): if hasattr(self, "pool"): self.pool.terminate() self.pool.join() def fit(self, traces, disp=False): np.seterr(all="ignore") self.tree = self._lltinf(traces, None, self.depth, disp=disp) return self def fit_partial(self, traces, disp=False): if self.tree is None: return self.fit(traces, disp=disp) else: preds = self.predict(traces.signals) failed = set() for i in range(len(preds)): leaf = self.tree.add_signal(traces.signals[i], traces.labels[i], self.interpolate, self.tinter) if preds[i] != traces.labels[i]: failed.add(leaf) # logger.debug("Failed set: {}".format(failed)) self._partial_add += len(failed) if self._partial_add // self.redo_after_failed > 0: # logger.debug("Redoing tree") self._partial_add = 0 return self.fit(self.tree.traces, disp=disp) else: for leaf in failed: # TODO don't redo whole node, only leaf tree = self._lltinf( leaf.traces, leaf.robustness, self.depth - leaf.level(), disp=disp, ) old_tree = leaf.copy() leaf.set_tree(tree) # FIXME only for perfect_stop preds = self.predict(traces.signals) if not np.array_equal(preds, traces.labels): self._partial_add = 0 return self.fit(self.tree.traces, disp=disp) return self def predict(self, signals): if self.tree is not None: return np.array([ self.tree.classify(s, self.interpolate, self.tinter) for s in signals ]) else: raise ValueError("Model not fit") def get_formula(self): if self.tree is not None: return self.tree.get_formula() else: raise ValueError("Model not fit") def _debug(self, *args): if self.log: logger.debug(*args) def _lltinf(self, traces, rho, depth, disp=False, override_impurity=None): """Recursive call for the decision tree construction. See lltinf for information on similar arguments. rho : list of numerics List of robustness values for each trace up until the current node depth : integer Maximum depth to be reached. Decrements for each recursive call """ # Stopping condition if any( [stop(self, traces, rho, depth) for stop in self.stop_condition]): return None # Find primitive using impurity measure self._debug( f"Creating primitives at depth {depth} over {len(traces)} traces") primitives = self.primitive_factory(traces.signals, traces.labels) if override_impurity is None: impurity = self.optimize_impurity else: impurity = override_impurity self._debug( f"Finding best primitive at depth {depth} over {len(traces)} traces" ) primitive, impurity = _find_best_primitive( traces, primitives, rho, impurity, disp, self.optimizer_args, times=self.times, interpolate=self.interpolate, tinter=self.tinter, ) if disp: print("Best: {} ({})".format(primitive, impurity)) self._debug(f"Best primitive found: {primitive} (imp: {impurity})") # Classify using best primitive and split into groups prim_rho = [ primitive.score(model) for model in traces.models(self.interpolate, self.tinter) ] if rho is None: rho = [np.inf for i in traces.labels] tree = DTree(primitive, traces, rho) def split(prim_rho): sat, unsat = [], [] for i, rho in enumerate(prim_rho): if rho >= 0: sat.append(i) else: unsat.append(i) return sat, unsat # [prim_rho, rho, signals, label] # sat_, unsat_ = split_groups( # list(zip(prim_rho, rho, *traces.as_list())), lambda x: x[0] >= 0 # ) sat_, unsat_ = split(prim_rho) self._debug(f"Split: {len(sat_)}/{len(unsat_)}") # pure_wrong = all([t[3] <= 0 for t in sat_]) or all([t[3] >= 0 for t in unsat_]) # pure_right = all([t[3] >= 0 for t in sat_]) or all([t[3] <= 0 for t in unsat_]) sat_right = len([i for i in sat_ if traces.labels[i] >= 0]) sat_wrong = len(sat_) - sat_right unsat_right = len([i for i in unsat_ if traces.labels[i] <= 0]) unsat_wrong = len(unsat_) - unsat_right # Switch sat and unsat if labels are wrong. No need to negate prim rho since # we use it in absolute value later if sat_right * unsat_right == 0 or (sat_wrong * unsat_wrong != 0 and sat_right < unsat_wrong): self._debug(f"Inverting primitive") sat_, unsat_ = unsat_, sat_ tree.primitive.negate() # No further classification possible if len(sat_) == 0 or len(unsat_) == 0: self._debug("No further classification possible") if override_impurity is None: self._debug("Attempting to classify using impurity fallback") return self._lltinf( traces, rho, depth, disp=disp, override_impurity=self.fallback_impurity, ) else: return None # Redo data structures sat_traces, unsat_traces = [ traces.subset(traces, idxs) for idxs in [sat_, unsat_] ] sat_rho, unsat_rho = [ np.amin( [np.abs([prim_rho[i] for i in idxs]), [rho[i] for i in idxs]], 0) for idxs in [sat_, unsat_] ] # sat, unsat = [ # (Traces(*group[2:]), np.amin([np.abs(group[0]), group[1]], 0)) # for group in [list(zip(*sat_)), list(zip(*unsat_))] # ] # Recursively build the tree tree.left = self._lltinf(sat_traces, sat_rho, depth - 1, disp=disp) tree.right = self._lltinf(unsat_traces, unsat_rho, depth - 1, disp=disp) return tree
def run(self, model, epoch=600, batchsize=16, learning_rate=0.0001, early_rejection=False, valid_interval=10, tag='', save_result=True, checkpoint='', pretrain=False, skip_train=False, validate_train=True, validate_valid=True, logdir='/data/public/rw/kaggle-data-science-bowl/logs/', **kwargs): self.set_network(model, batchsize) ds_train, ds_valid, ds_valid_full, ds_test = self.network.get_input_flow( ) self.network.build() print(HyperParams.get().__dict__) net_output = self.network.get_output() net_loss = self.network.get_loss() global_step = tf.Variable(0, trainable=False) learning_rate_v, train_op = self.network.get_optimize_op( global_step=global_step, learning_rate=learning_rate) best_loss_val = 999999 best_miou_val = 0.0 name = '%s_%s_lr=%.8f_epoch=%d_bs=%d' % ( tag if tag else datetime.datetime.now().strftime("%y%m%dT%H%M%f"), model, learning_rate, epoch, batchsize, ) model_path = os.path.join(KaggleSubmission.BASEPATH, name, 'model') best_ckpt_saver = BestCheckpointSaver(save_dir=model_path, num_to_keep=100, maximize=True) saver = tf.train.Saver() m_epoch = 0 # initialize session self.init_session() # tensorboard tf.summary.scalar('loss', net_loss, collections=['train', 'valid']) s_train = tf.summary.merge_all('train') s_valid = tf.summary.merge_all('valid') train_writer = tf.summary.FileWriter(logdir + name + '/train', self.sess.graph) valid_writer = tf.summary.FileWriter(logdir + name + '/valid', self.sess.graph) logger.info('initialization+') if not checkpoint: self.sess.run(tf.global_variables_initializer()) if pretrain: global_vars = tf.global_variables() from tensorflow.python import pywrap_tensorflow reader = pywrap_tensorflow.NewCheckpointReader( self.network.get_pretrain_path()) var_to_shape_map = reader.get_variable_to_shape_map() saved_vars = list(var_to_shape_map.keys()) var_list = [ x for x in global_vars if x.name.replace(':0', '') in saved_vars ] var_list = [x for x in var_list if 'logit' not in x.name] logger.info('pretrained weights(%d) loaded : %s' % (len(var_list), self.network.get_pretrain_path())) pretrain_loader = tf.train.Saver(var_list) pretrain_loader.restore(self.sess, self.network.get_pretrain_path()) elif checkpoint == 'best': path = get_best_checkpoint(model_path) saver.restore(self.sess, path) logger.info('restored from best checkpoint, %s' % path) elif checkpoint == 'latest': path = tf.train.latest_checkpoint(model_path) saver.restore(self.sess, path) logger.info('restored from latest checkpoint, %s' % path) else: saver.restore(self.sess, checkpoint) logger.info('restored from checkpoint, %s' % checkpoint) step = self.sess.run(global_step) start_e = (batchsize * step) // len(CellImageDataManagerTrain.LIST) logger.info('training started+') if epoch > 0 and not skip_train: try: losses = [] for e in range(start_e, epoch): loss_val_avg = [] train_cnt = 0 for dp_train in ds_train.get_data(): _, loss_val, summary_train = self.sess.run( [train_op, net_loss, s_train], feed_dict=self.network.get_feeddict( dp_train, True)) loss_val_avg.append(loss_val) train_cnt += 1 step, lr = self.sess.run([global_step, learning_rate_v]) loss_val_avg = sum(loss_val_avg) / len(loss_val_avg) logger.info( 'training %d epoch %d step, lr=%.8f loss=%.4f train_iter=%d' % (e + 1, step, lr, loss_val_avg, train_cnt)) losses.append(loss_val) train_writer.add_summary(summary_train, global_step=step) if early_rejection and len(losses) > 100 and losses[ len(losses) - 100] * 1.05 < loss_val_avg: logger.info('not improved, stop at %d' % e) break # early rejection if early_rejection and ((e == 50 and loss_val > 0.5) or (e == 200 and loss_val > 0.2)): logger.info('not improved training loss, stop at %d' % e) break m_epoch = e avg = 10.0 if loss_val < 0.20 and (e + 1) % valid_interval == 0: avg = [] for _ in range(5): ds_valid.reset_state() ds_valid_d = ds_valid.get_data() for dp_valid in ds_valid_d: loss_val, summary_valid = self.sess.run( [net_loss, s_valid], feed_dict=self.network.get_feeddict( dp_valid, False)) avg.append(loss_val) ds_valid_d.close() avg = sum(avg) / len(avg) logger.info('validation loss=%.4f' % (avg)) if best_loss_val > avg: best_loss_val = avg valid_writer.add_summary(summary_valid, global_step=step) if avg < 0.16 and e >= 100 and (e + 1) % valid_interval == 0: cnt_tps = np.array((len(thr_list)), dtype=np.int32), cnt_fps = np.array((len(thr_list)), dtype=np.int32) cnt_fns = np.array((len(thr_list)), dtype=np.int32) pool_args = [] ds_valid_full.reset_state() ds_valid_full_d = ds_valid_full.get_data() for idx, dp_valid in tqdm( enumerate(ds_valid_full_d), desc='validate using the iou metric', total=len(CellImageDataManagerValid.LIST)): image = dp_valid[0] inference_result = self.network.inference( self.sess, image, cutoff_instance_max=0.9) instances, scores = inference_result[ 'instances'], inference_result['scores'] pool_args.append( (thr_list, instances, dp_valid[2])) ds_valid_full_d.close() pool = Pool(processes=8) cnt_results = pool.map(do_get_multiple_metric, pool_args) pool.close() pool.join() pool.terminate() for cnt_result in cnt_results: cnt_tps = cnt_tps + cnt_result[0] cnt_fps = cnt_fps + cnt_result[1] cnt_fns = cnt_fns + cnt_result[2] ious = np.divide(cnt_tps, cnt_tps + cnt_fps + cnt_fns) mIou = np.mean(ious) logger.info('validation metric: %.5f' % mIou) if best_miou_val < mIou: best_miou_val = mIou best_ckpt_saver.handle( mIou, self.sess, global_step) # save & keep best model # early rejection by mIou if early_rejection and e > 50 and best_miou_val < 0.15: break if early_rejection and e > 100 and best_miou_val < 0.25: break except KeyboardInterrupt: logger.info('interrupted. stop training, start to validate.') try: chk_path = get_best_checkpoint(model_path, select_maximum_value=True) if chk_path: logger.info( 'training is done. Start to evaluate the best model. %s' % chk_path) saver.restore(self.sess, chk_path) except Exception as e: logger.warning('error while loading the best model:' + str(e)) # show sample in train set : show_train > 0 kaggle_submit = KaggleSubmission(name) if validate_train in [True, 'True', 'true']: logger.info('Start to test on training set.... (may take a while)') train_metrics = [] for single_id in tqdm(CellImageDataManagerTrain.LIST[:20], desc='training set test'): result = self.single_id(None, None, single_id, set_type='train', show=False, verbose=False) image = result['image'] labels = result['labels'] instances = result['instances'] score = result['score'] score_desc = result['score_desc'] img_vis = Network.visualize(image, labels, instances, None) kaggle_submit.save_train_image(single_id, img_vis, score=score, score_desc=score_desc) train_metrics.append(score) logger.info('trainset validation ends. score=%.4f' % np.mean(train_metrics)) # show sample in valid set : show_valid > 0 if validate_valid in [True, 'True', 'true']: logger.info( 'Start to test on validation set.... (may take a while)') valid_metrics = [] for single_id in tqdm(CellImageDataManagerValid.LIST, desc='validation set test'): result = self.single_id(None, None, single_id, set_type='train', show=False, verbose=False) image = result['image'] labels = result['labels'] instances = result['instances'] score = result['score'] score_desc = result['score_desc'] img_vis = Network.visualize(image, labels, instances, None) kaggle_submit.save_valid_image(single_id, img_vis, score=score, score_desc=score_desc) kaggle_submit.valid_instances[single_id] = ( instances, result['instance_scores']) valid_metrics.append(score) logger.info('validation ends. score=%.4f' % np.mean(valid_metrics)) # show sample in test set logger.info('saving...') if save_result: for i, single_id in tqdm( enumerate(CellImageDataManagerTest.LIST), total=len(CellImageDataManagerTest.LIST)): # TODO try: result = self.single_id(None, None, single_id, 'test', False, False) except Exception as e: logger.warning('single_id=%s err=%s' % (single_id, str(e))) continue image = result['image'] instances = result['instances'] img_h, img_w = image.shape[:2] img_vis = Network.visualize(image, None, instances, None) # save to submit instances = Network.resize_instances(instances, (img_h, img_w)) kaggle_submit.save_image(single_id, img_vis) kaggle_submit.test_instances[single_id] = ( instances, result['instance_scores']) kaggle_submit.add_result(single_id, instances) # for single_id in tqdm(CellImageDataManagerTest.LIST[1120:], desc='test set evaluation'): # result = self.single_id(None, None, single_id, set_type='test', show=False, verbose=False) # temporal saving if i % 500 == 0: kaggle_submit.save() kaggle_submit.save() logger.info( 'done. epoch=%d best_loss_val=%.4f best_mIOU=%.4f name= %s' % (m_epoch, best_loss_val, best_miou_val, name)) return best_miou_val, name
def batch(frames, diameter, output=None, meta=None, processes=1, after_locate=None, **kwargs): """Locate Gaussian-like blobs of some approximate size in a set of images. Preprocess the image by performing a band pass and a threshold. Locate all peaks of brightness, characterize the neighborhoods of the peaks and take only those with given total brightness ("mass"). Finally, refine the positions of each peak. Parameters ---------- frames : list (or iterable) of images The frames to process. diameter : odd integer or tuple of odd integers This may be a single number or a tuple giving the feature's extent in each dimension, useful when the dimensions do not have equal resolution (e.g. confocal microscopy). The tuple order is the same as the image shape, conventionally (z, y, x) or (y, x). The number(s) must be odd integers. When in doubt, round up. output : {None, trackpy.PandasHDFStore, SomeCustomClass} If None, return all results as one big DataFrame. Otherwise, pass results from each frame, one at a time, to the put() method of whatever class is specified here. meta : filepath or file object, optional If specified, information relevant to reproducing this batch is saved as a YAML file, a plain-text machine- and human-readable format. By default, this is None, and no file is saved. processes : integer or "auto", optional The number of processes to use in parallel. If <= 1, multiprocessing is disabled. If "auto", the number returned by `os.cpu_count()`` is used. after_locate : function, optional Specify a custom function to apply to the detected features in each processed frame. It must accept the following arguments: - ``frame_no``: an integer specifying the number of the current frame. - ``features``: a DataFrame containing the detected features. Furthermore it must return a DataFrame like ``features``. **kwargs : Keyword arguments that are passed to the wrapped `trackpy.locate`. Refer to its docstring for further details. Returns ------- DataFrame([x, y, mass, size, ecc, signal]) where mass means total integrated brightness of the blob, size means the radius of gyration of its Gaussian-like profile, and ecc is its eccentricity (0 is circular). See Also -------- locate : performs location on a single image Notes ----- This is a convenience function that wraps `trackpy.locate` (see its docstring for further details) and allows batch processing of multiple frames, optionally in parallel by using multiprocessing. """ if "raw_image" in kwargs: raise KeyError("the argument `raw_image` musn't be in `kwargs`, it is " "provided internally by `frames`") # Add required keyword argument kwargs["diameter"] = diameter if meta: # Gather meta information and save as YAML in current directory. try: source = frames.filename except AttributeError: source = None meta_info = dict( timestamp=pd.datetime.utcnow().strftime('%Y-%m-%d-%H%M%S'), trackpy_version=trackpy.__version__, source=source, **kwargs ) if isinstance(meta, six.string_types): with open(meta, 'w') as file_obj: record_meta(meta_info, file_obj) else: # Interpret meta to be a file handle. record_meta(meta_info, meta) # Prepare wrapped function for mapping to `frames` curried_locate = partial(locate, **kwargs) # Handle & validate argument `processes` if processes == "auto": processes = None # Is replaced with `os.cpu_count` in Pool elif not isinstance(processes, six.integer_types): raise TypeError("`processes` must either be an integer or 'auto', " "was type {}".format(type(processes))) if processes is None or processes > 1: # Use multiprocessing pool = Pool(processes=processes) map_func = pool.imap else: pool = None map_func = map if after_locate is None: def after_locate(frame_no, features): return features try: all_features = [] for i, features in enumerate(map_func(curried_locate, frames)): image = frames[i] if hasattr(image, 'frame_no') and image.frame_no is not None: frame_no = image.frame_no # If this works, locate created a 'frame' column. else: frame_no = i features['frame'] = i # just counting iterations features = after_locate(frame_no, features) logger.info("Frame %d: %d features", frame_no, len(features)) if len(features) > 0: # Store if features were found if output is None: all_features.append(features) else: output.put(features) finally: if pool: # Ensure correct termination of Pool pool.terminate() if output is None: if len(all_features) > 0: return pandas_concat(all_features).reset_index(drop=True) else: # return empty DataFrame warnings.warn("No maxima found in any frame.") return pd.DataFrame(columns=list(features.columns) + ['frame']) else: return output
def MP_sed_fit(spec_or_photo, igals, sim='lgal', noise='none', method='ifsps', model='emulator', nthreads=1, nwalkers=100, burnin=100, niter=1000, maxiter=200000, overwrite=False, postprocess=False, justplot=False): ''' multiprocessing wrapepr for fit_spectra and fit_photometry. This does *not* parallelize the MCMC sampling of individual fits but rather runs multiple fits simultaneously. :param spec_or_photo: fit spectra or photometry :param igals: array/list of spectral_challenge galaxy indices :param noise: If 'none', fit noiseless spectra. If 'bgs1'...'bgs8', fit BGS-like spectra. (default: 'none') :param dust: If True, fit the spectra w/ dust using a model with dust If False, fit the spectra w/o dust using a model without dust. (default: False) :param nthreads: Number of threads. If nthreads == 1, just runs fit_spectra ''' args = igals # galaxy indices kwargs = { 'sim': sim, 'noise': noise, 'method': method, 'model': model, 'nwalkers': nwalkers, 'burnin': burnin, 'niter': niter, 'maxiter': maxiter, 'opt_maxiter': 1000, 'overwrite': overwrite, 'postprocess': postprocess, 'justplot': justplot } if spec_or_photo == 'spec': fit_func = fit_spectra elif spec_or_photo == 'photo': fit_func = fit_photometry elif spec_or_photo == 'specphoto': fit_func = fit_spectrophotometry if nthreads > 1: pool = Pool(processes=nthreads) pool.map(partial(fit_func, **kwargs), args) pool.close() pool.terminate() pool.join() else: # single thread, loop over for igal in args: fit_func(igal, **kwargs) return None
class TaskManager(WithLogger): """A Task manager.""" def __init__( self, nb_workers: int = 1, is_lazy_pool_start: bool = True, logger: Optional[logging.Logger] = None, ): """ Initialize the task manager. :param nb_workers: the number of worker processes. :param is_lazy_pool_start: option to postpone pool creation till the first enqueue_task called. """ WithLogger.__init__(self, logger) self._nb_workers = nb_workers self._is_lazy_pool_start = is_lazy_pool_start self._pool = None # type: Optional[Pool] self._stopped = True self._lock = threading.Lock() self._task_enqueued_counter = 0 self._results_by_task_id = {} # type: Dict[int, Any] @property def is_started(self) -> bool: """ Get started status of TaskManager. :return: bool """ return not self._stopped @property def nb_workers(self) -> int: """ Get the number of workers. :return: int """ return self._nb_workers def enqueue_task( self, func: Callable, args: Sequence = (), kwds: Optional[Dict[str, Any]] = None ) -> int: """ Enqueue a task with the executor. :param func: the callable instance to be enqueued :param args: the positional arguments to be passed to the function. :param kwds: the keyword arguments to be passed to the function. :return the task id to get the the result. :raises ValueError: if the task manager is not running. """ with self._lock: if self._stopped: raise ValueError("Task manager not running.") if not self._pool and self._is_lazy_pool_start: self._start_pool() self._pool = cast(Pool, self._pool) task_id = self._task_enqueued_counter self._task_enqueued_counter += 1 async_result = self._pool.apply_async( func, args=args, kwds=kwds if kwds is not None else {} ) self._results_by_task_id[task_id] = async_result return task_id def get_task_result(self, task_id: int) -> AsyncResult: """ Get the result from a task. :return: async result for task_id """ task_result = self._results_by_task_id.get( task_id, None ) # type: Optional[AsyncResult] if task_result is None: raise ValueError("Task id {} not present.".format(task_id)) return task_result def start(self) -> None: """ Start the task manager. :return: None """ with self._lock: if self._stopped is False: self.logger.debug("Task manager already running.") else: self.logger.debug("Start the task manager.") self._stopped = False if not self._is_lazy_pool_start: self._start_pool() def stop(self) -> None: """ Stop the task manager. :return: None """ with self._lock: if self._stopped is True: self.logger.debug("Task manager already stopped.") else: self.logger.debug("Stop the task manager.") self._stopped = True self._stop_pool() def _start_pool(self) -> None: """ Start internal task pool. Only one pool will be created. :return: None """ if self._pool: self.logger.debug("Pool was already started!") return self._pool = Pool(self._nb_workers, initializer=init_worker) def _stop_pool(self) -> None: """ Stop internal task pool. :return: None """ if not self._pool: self.logger.debug("Pool is not started!.") return self._pool = cast(Pool, self._pool) self._pool.terminate() self._pool.join() self._pool = None
def run(cls, reset=False, num_threads=1): # -------------------- MCMC ---------------------------- cls.ndim = cls.mean_model.ndim + cls.gp_model.ndim nwalkers = cls.ndim * 4 nsteps = cls.settings['num_steps'] # By default we define the initial params from the priors set_params = True # Check backend status and iterations and compare to config. Reset if flag is true if cls.backend is not None: # If we have a backend file to fetch the sampler from filename = Path(cls.backend.filename) if filename.is_file(): # If we want to reset it, just clear the sampler and get init sample from prior if reset: log.info("Resetting the backend sampler") # backend.reset(nwalkers, ndim) # TODO: This line is not working as intended filename.unlink( ) # TODO: Remove the file. Hack because reset is not working # Else, init the backend and check if we have more iterations in the sampler than the desired ones else: # If we have, stop the code if cls.backend.iteration >= nsteps: log.warn( "Skipping run. Backend number of iterations greater than settings" ) return # Otherwise, calculate the remaining steps and continue from there else: nsteps = nsteps - cls.backend.iteration set_params = False # If the initial params are not taken from the backend init them from the prior init_params = None if set_params: # Sample priors to get initial values for all walkers if cls.gp_model is not None and cls.mean_model is not None: init_gp_params = cls.gp_model.sample_prior(num=nwalkers) init_mean_params = cls.mean_model.sample_prior(num=nwalkers) init_params = np.hstack([init_gp_params, init_mean_params]) elif cls.mean_model is not None: init_params = cls.mean_model.sample_prior(num=nwalkers) elif cls.gp_model is not None: init_params = cls.gp_model.sample_prior(num=nwalkers) # Single or Multiprocessing always uses pool as the init_worker can handle the system interrupts pool = Pool(num_threads, cls.init_worker) sampler = emcee.EnsembleSampler(nwalkers, cls.ndim, cls.lnlike_func, pool=None, backend=cls.backend) # Run mcmc log.info( f"Running MCMC on {num_threads} processes for {nsteps} iterations") try: sampler.run_mcmc(init_params, nsteps, progress=True) except KeyboardInterrupt: log.warn(f"Emcee was stopped by user input") pool.terminate() pool.join() sys.exit() # TODO: get_chain is the new method but it has different dims cls.chain = sampler.chain.copy() cls.log_prob = sampler.get_log_prob().copy() # Save data # TODO: Need to update posterior name and then update filenames if cls.settings['save']: log.info(f"Saving chain and log_prob") with open(cls.datadir / "chain.pk", "wb") as f: pickle.dump(sampler.chain, f, protocol=-1) with open(cls.datadir / "posterior.pk", "wb") as f: pickle.dump(sampler.get_log_prob(), f, protocol=-1)
def benchmark_ensembles(print_to_file=False): # load all possible models and datasets datasets = __load_datasets() models = Classifier.all_models() voting_systems = VotingSystem.all_voting_systems() ensemble_types = Ensemble.all_ensemble_types() # output format if print_to_file == True: output_string_format = "{0},{1},{8},{2},{3},{4},{5},{6},{7}" else: output_string_format = "{0: <15}\t{1: <28}\t{8: <15}{2: <13}\t{3: <18}\t{4: <28}\t{5: <22}\t{6: <15}\t{7: <22}" # print output header print(output_string_format .format('score','dataset','dataset_size','number_of_classes','model','ensemble','ensemble_size','voting_system', 'feature_count')) ensamble_sizes = [1, 2, 4, 6, 8, 12, 15, 20, 25, 30, 40, 50, 60, 70, 100][::-1] process_jobs_args = [] l = Lock() for classifiers_in_ensamble in ensamble_sizes: for dataset in datasets: # load mock_classifier X, Y = dataset.load() # dataset info dataset_size = X.shape[0] classes_count = np.unique(Y).size # Split dataset into kfold datasets kfold_labels = __kfold_labels(Y) # Check if every class is in every set after kfold for train_index, test_index in kfold_labels: Y_train, Y_test = Y[train_index], Y[test_index] assert np.unique(Y_train).size == classes_count # evaluate models for model in models: # score enembles based on current model for voting_system in voting_systems: for ensemble_type in ensemble_types: feature_labels = None if ensemble_type == RandomSubspace: feature_labels = __load_feature_labels(dataset.path()) # create ensemble ensemble = ensemble_type(voting_system, type(model), classifiers_in_ensamble) job_args = ensemble, X, Y, kfold_labels, dataset, model, output_string_format, classifiers_in_ensamble, voting_system.name(), feature_labels process_jobs_args.append(job_args) pool = Pool(initializer=__init_proc, initargs=(l, ), processes=None) pool.map(__model_score_job, process_jobs_args) pool.terminate() pool = None
class Executor(QueueListener): '''smarbus JSON RPC 请求执行器 使用进程池执行 RPC 请求 :param queue_maxsize: 任务队列最大值 默认为0,表示无限制。 :param pool_processes: 执行器池的最大数量 默认为 none,表示使用 CPU 核心数量作为其最大值 :param pool_maxtasksperchild: 进程池最大执行数量 默认为 None,表示无限制。超过该值,则重启子进程。仅对进程池模型有效。 在接收到 smartbus 请求后,需要向这个队列放置数据,数据的格式是: ``client, pack_info, txt`` 分别是:收到数据的 smartbus 客户端的实例,数据包附加信息,数据文本。 收到数据后,本类型的实例将按照 JSON-RPC 格式解析数据,并执行 JSON RPC 请求,最后将执行结果通过 smartbus 客户端进行返回。 返回数据格式是符合 JSON RPC 标准的字符串。 ''' def __init__(self, queue_maxsize=0, pool_processes=None, pool_maxtasksperchild=None): self._pool_kdargs = dict( processes=pool_processes, initializer=_subproc_init, initargs=( globalvars.prog_args, globalvars.main_logging_queue, logging.root.level ), maxtasksperchild=pool_maxtasksperchild ) self._pool = None if PY3K: super().__init__(queue.Queue(queue_maxsize)) else: super(Executor, self).__init__(queue.Queue(queue_maxsize)) if PY3K: self._logger = logging.getLogger(self.__class__.__qualname__) else: self._logger = logging.getLogger(self.__class__.__name__) def put(self, client, pack_info, txt): self.queue.put((client, pack_info, txt, time.time())) def start(self): self._logger.info('start() >>>. pool arguments: %s', self._pool_kdargs) self._pool = Pool(**self._pool_kdargs) if PY3K: super().start() else: super(Executor, self).start() self._logger.info('start() <<<') def stop(self): self._logger.info('stop() >>>') super().stop() self._logger.debug('pool.terminate() ...') self._pool.terminate() self._logger.debug('pool.join() ...') self._pool.join() self._pool = None self._logger.info('stop() <<<') def handle(self, record): if globalvars.prog_args.verbose: self._logger.debug('handle(record=%s)', record) request = None try: client, pack_info, txt, begin_time = record try: request, _, _ = jsonrpc.parse(txt) except Exception as e: if globalvars.prog_args.verbose: self._logger.error( 'JSONRPC parse error: %s %s', type(e), e) if request: _id = request.get('id') _method = request['method'] _args = request['args'] _kwargs = request['kwargs'] def _callback(result): try: if isinstance(result, Exception): # 如果返回结果是异常,就返回错误结果,并抛出异常 error = result if _id: # 如果有 RPC ID ,就需要返回错误结果 if isinstance(error, jsonrpc.Error): # 处理 jsonrpc.Error 异常 response = error.to_dict() response['id'] = _id else: # 处理其它异常 response = { 'jsonrpc': jsonrpc.jsonrpc_version, 'id': _id, 'error': { 'code':-32500, 'message': '{} {}'.format(type(error), error), 'data': None, } } data = json.dumps(response) client.sendNotify(pack_info.srcUnitId, pack_info.srcUnitClientId, None, _id, 0, settings.SMARTBUS_NOTIFY_TTL, data) raise error # 抛出异常 if globalvars.prog_args.verbose: self._logger.debug( 'call back:\n result=%s %s\n duration=%s\n request=%s', type(result), result, time.time() - begin_time, record ) if _id: # 如果有 RPC ID ,就需要返回执行结果 response = { 'jsonrpc': jsonrpc.jsonrpc_version, 'id': _id, 'result': result, } data = json.dumps(response) client.sendNotify(pack_info.srcUnitId, pack_info.srcUnitClientId, None, _id, 0, settings.SMARTBUS_NOTIFY_TTL, data) except Exception as e: if globalvars.prog_args.verbose: self._logger.exception( 'error occurred in handle._callback():\n request=%s', record) else: self._logger.error( 'error occurred in handle._callback():\n error: %s %s', type(e), e) pass # end of _callback def _error_callback(error): try: # Python3.4 issue20980: In multiprocessing.pool, # ExceptionWithTraceback should derive from Exception if not isinstance(error, Exception): error = error.exc if globalvars.prog_args.verbose: self._logger.exception( 'error callback:\n duration=%s\n request=%s:\n %s %s', time.time() - begin_time, record, type(error), error ) else: self._logger.error( 'error callback:\n %s %s\n duration=%s\n request=%s\n %s %s', type(error), error, time.time() - begin_time, record, type(error), error ) if _id: # 如果有 RPC ID ,就需要返回错误结果 if isinstance(error, jsonrpc.Error): # JSONRPC 异常 response = error.to_dict() response['id'] = _id else: # 其它异常 response = { 'jsonrpc': jsonrpc.jsonrpc_version, 'id': _id, 'error': { 'code':-32500, 'message': '{} {}'.format(type(error), error), 'data': None, } } data = json.dumps(response) client.sendNotify(pack_info.srcUnitId, pack_info.srcUnitClientId, None, _id, 0, settings.SMARTBUS_NOTIFY_TTL, data) except Exception as e: if globalvars.prog_args.verbose: self._logger.exception( 'error occurred in handle._error_callback():\n request=%s', record) else: self._logger.error( 'error occurred in handle._error_callback():\n error=%s', e) pass # end of _error_callback if globalvars.prog_args.verbose: self._logger.debug('pool.apply_async(%s, %s, %s)', _method, _args, _kwargs) if sys.version_info[0] < 3: self._pool.apply_async( func=partial(_poolfunc, _method), args=(_args, _kwargs), callback=_callback ) else: self._pool.apply_async( func=partial(_poolfunc, _method), args=(_args, _kwargs), callback=_callback, error_callback=_error_callback ) except Exception as e: if globalvars.prog_args.verbose: self._logger.exception( 'error occurred in handle():\n request=%s', record) else: self._logger.error( 'error occurred in handle():\n error: %s %s', type(e), e)
fmcmc = os.path.join('/global/cscratch1/sd/chahah/provabgs/raga/', sample.replace('.fits', '.%i.hdf5' % igal)) # run MCMC zeus_chain = desi_mcmc.run( wave_obs=w_obs, flux_obs=f_obs, flux_ivar_obs=i_obs, bands='desi', # g, r, z photo_obs=photo_flux_i, photo_ivar_obs=photo_ivar_i, zred=zred_i, vdisp=0., sampler='zeus', nwalkers=30, burnin=0, opt_maxiter=2000, niter=niter, progress=True, debug=True, writeout=fmcmc, overwrite=True) return None pool = Pool(processes=n_cpu) pool.map(partial(run_mcmc), np.arange(i0, i1 + 1)) pool.close() pool.terminate() pool.join()
class TaskPool(object): """Process Pool for processing tasks in parallel. :param limit: see :attr:`limit` attribute. :param logger: see :attr:`logger` attribute. .. attribute:: limit The number of processes that can run simultaneously. .. attribute:: logger The logger used for debugging. """ def __init__(self, limit, logger=None): self.limit = limit self.logger = logger or multiprocessing.get_logger() self._pool = None self._processes = None def start(self): """Run the task pool. Will pre-fork all workers so they're ready to accept tasks. """ self._processes = {} self._pool = Pool(processes=self.limit) def stop(self): """Terminate the pool.""" self._pool.terminate() self._processes = {} self._pool = None def apply_async(self, target, args=None, kwargs=None, callbacks=None, errbacks=None, on_ack=None, meta=None): """Equivalent of the :func:``apply`` built-in function. All ``callbacks`` and ``errbacks`` should complete immediately since otherwise the thread which handles the result will get blocked. """ args = args or [] kwargs = kwargs or {} callbacks = callbacks or [] errbacks = errbacks or [] meta = meta or {} tid = gen_unique_id() on_return = curry(self.on_return, tid, callbacks, errbacks, on_ack, meta) result = self._pool.apply_async(target, args, kwargs, callback=on_return) self._processes[tid] = [result, callbacks, errbacks, meta] return result def on_return(self, tid, callbacks, errbacks, on_ack, meta, ret_value): """What to do when the process returns.""" # Acknowledge the task as being processed. if on_ack: on_ack() try: del(self._processes[tid]) except KeyError: pass else: self.on_ready(callbacks, errbacks, meta, ret_value) def full(self): """Is the pool full? :returns: ``True`` if the maximum number of concurrent processes has been reached. """ return len(self._processes.values()) >= self.limit def get_worker_pids(self): """Returns the process id's of all the pool workers.""" return [process.pid for process in self._pool._pool] def on_ready(self, callbacks, errbacks, meta, ret_value): """What to do when a worker task is ready and its return value has been collected.""" if isinstance(ret_value, ExceptionInfo): if isinstance(ret_value.exception, ( SystemExit, KeyboardInterrupt)): raise ret_value.exception for errback in errbacks: errback(ret_value, meta) else: for callback in callbacks: callback(ret_value, meta)
def main(force_reanalyze=False, include_hidden=False, dry_run=False, gain_type='auto', jobs=default_job_count(), quiet=False, verbose=False, *music_directories ): """Add replaygain tags to your music files.""" if quiet: logging.basicConfig(level=logging.WARN) elif verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) # Some pesky functions used below will catch KeyboardInterrupts # inappropriately, so install an alternate handler that bypasses # KeyboardInterrupt instead. def signal_handler(sig, frame): print "Canceled." os.kill(os.getpid(), signal.SIGTERM) original_handler = signal.signal(signal.SIGINT, signal_handler) track_class = RGTrack if dry_run: logging.warn('This script is running in "dry run" mode, so no files will actually be modified.') track_class = RGTrackDryRun if len(music_directories) == 0: logging.error("You did not specify any music directories or files. Exiting.") sys.exit(1) logging.info("Searching for music files in the following directories:\n%s", "\n".join(music_directories),) tracks = [ track_class(f) for f in get_all_music_files(music_directories, ignore_hidden=(not include_hidden)) ] # Filter out tracks for which we can't get the length for t in tracks[:]: try: len(t) except Exception: logging.error("Track %s appears to be invalid. Skipping.", t.filename) tracks.remove(t) if len(tracks) == 0: logging.error("Failed to find any tracks in the directories you specified. Exiting.") sys.exit(1) track_sets = RGTrackSet.MakeTrackSets(tracks) # Remove the earlier bypass of KeyboardInterrupt signal.signal(signal.SIGINT, original_handler) logging.info("Beginning analysis") handler = TrackSetHandler(force=force_reanalyze, gain_type=gain_type) # For display purposes, calculate how much granularity is required # to show visible progress at each update total_length = sum(len(ts) for ts in track_sets) min_step = min(len(ts) for ts in track_sets) places_past_decimal = max(0,int(math.ceil(-math.log10(min_step * 100.0 / total_length)))) update_string = '%.' + str(places_past_decimal) + 'f%% done' import gst pool = None try: if jobs == 1: # Sequential handled_track_sets = imap(handler, track_sets) else: # Parallel pool = Pool(jobs) handled_track_sets = pool.imap_unordered(handler,track_sets) processed_length = 0 percent_done = 0 for ts in handled_track_sets: processed_length = processed_length + len(ts) percent_done = 100.0 * processed_length / total_length logging.info(update_string, percent_done) logging.info("Analysis complete.") except KeyboardInterrupt: if pool is not None: logging.debug("Terminating process pool") pool.terminate() pool = None raise finally: if pool is not None: logging.debug("Closing transcode process pool") pool.close() if dry_run: logging.warn('This script ran in "dry run" mode, so no files were actually modified.') pass
def main(): num_args = 2 args = sys.argv[1:] if len(args) != num_args: print( "Usage: python3 launch_clients.py <client-scenario.json> <client_keys_to_run>\n" "Example: python3 launch_clients.py scenario1.json clients1,clients2,clients3" ) exit(1) client_scenario_filename = args[0] groups = args[1].split(",") with open(f"{CLI_SCENARIOS_PATH}/{client_scenario_filename}", 'r') as cli_scenario_fp: cli_scenario = json.load(cli_scenario_fp) print(f"Launching scenario: {client_scenario_filename}") setup_client_node_dirs(cli_scenario, groups) print("Finished setting up dirs") ingress = get_ingress() print(f"Got ingress {ingress}") env_vars = { "AUTHENTICATION_URL": ingress, "BATTLES_URL": ingress, "GYM_URL": ingress, "LOCATION_URL": ingress, "MICROTRANSACTIONS_URL": ingress, "NOTIFICATIONS_URL": ingress, "STORE_URL": ingress, "TRADES_URL": ingress, "TRAINERS_URL": ingress, "INGRESS_URL": ingress, "NOVAPOKEMON": NOVAPOKEMON_DIR, "LOCATION_TAGS": f'{CLI_DIR}/location_tags.json', "DELAYS_CONFIG": f'{CLI_DIR}/delays_config.json', "CLIENT_DELAYS": f'{CLI_DIR}/client_delays.json', "CELLS_TO_REGION": f'{CLI_DIR}/cells_to_region.json', "REGIONS_TO_AREA": f'{CLI_DIR}/regions_to_area.json', "CONFIGS": f'{CLI_DIR}/configs.json', "LAT": f'{CLI_DIR}/lats.txt', "LOCATIONS": f'{CLI_DIR}/locations.json' } print(f'Created env vars!') pool = Pool(processes=os.cpu_count()) print("Will launch clients...") async_waits = [] for cli_job_name, cli_job in cli_scenario.items(): if cli_job_name not in groups: print(f"{cli_job_name} not in list") continue t = time.localtime() current_time = time.strftime("%H:%M:%S", t) print(f'Launching {cli_job_name} at {current_time}...', flush=True) async_waits.append( pool.apply_async(launch_cli_job, (cli_job_name, cli_job, env_vars))) for w in async_waits: cli_job_name = w.get() print(f'Finished multiclient for {cli_job_name}') pool.terminate() pool.close()
class ProcessPoolStrategy(ParallelStrategy, _PoolRunnableStrategy, _Resultable): _Processors_Pool: Pool = None _Processors_List: List[Union[ApplyResult, AsyncResult]] = None def __init__(self, pool_size: int): super().__init__(pool_size=pool_size) def initialization(self, queue_tasks: Optional[Union[_BaseQueueTask, _BaseList]] = None, features: Optional[Union[_BaseFeatureAdapterFactory, _BaseList]] = None, *args, **kwargs) -> None: super(ProcessPoolStrategy, self).initialization(queue_tasks=queue_tasks, features=features, *args, **kwargs) # Activate multiprocessing.managers.BaseManager server activate_manager_server() # Initialize and build the Processes Pool. __pool_initializer: Callable = kwargs.get("pool_initializer", None) __pool_initargs: IterableType = kwargs.get("pool_initargs", None) self._Processors_Pool = Pool(processes=self.pool_size, initializer=__pool_initializer, initargs=__pool_initargs) def apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}) -> None: self.reset_result() __process_running_result = None try: __process_running_result = [ self._Processors_Pool.apply(func=function, args=args, kwds=kwargs) for _ in range(tasks_size) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply(self, tasks_size: int, function: Callable, args: Tuple = (), kwargs: Dict = {}, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() self._Processors_List = [ self._Processors_Pool.apply_async(func=function, args=args, kwds=kwargs, callback=callback, error_callback=error_callback) for _ in range(tasks_size) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def apply_with_iter(self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None) -> None: self.reset_result() __process_running_result = None if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] try: __process_running_result = [ self._Processors_Pool.apply(func=_func, args=_args, kwds=_kwargs) for _func, _args, _kwargs in zip(functions_iter, args_iter, kwargs_iter) ] __exception = None __process_run_successful = True except Exception as e: __exception = e __process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=__process_run_successful, result=__process_running_result, exception=None) def async_apply_with_iter( self, functions_iter: List[Callable], args_iter: List[Tuple] = None, kwargs_iter: List[Dict] = None, callback_iter: List[Callable] = None, error_callback_iter: List[Callable] = None) -> None: self.reset_result() if args_iter is None: args_iter = [() for _ in functions_iter] if kwargs_iter is None: kwargs_iter = [{} for _ in functions_iter] if callback_iter is None: callback_iter = [None for _ in functions_iter] if error_callback_iter is None: error_callback_iter = [None for _ in functions_iter] self._Processors_List = [ self._Processors_Pool.apply_async(func=_func, args=_args, kwds=_kwargs, callback=_callback, error_callback=_error_callback) for _func, _args, _kwargs, _callback, _error_callback in zip( functions_iter, args_iter, kwargs_iter, callback_iter, error_callback_iter) ] for process in self._Processors_List: _process_running_result = None _process_run_successful = None _exception = None try: _process_running_result = process.get() _process_run_successful = process.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict self._result_saving(successful=_process_run_successful, result=_process_running_result, exception=_exception) def map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.map( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map(self, function: Callable, args_iter: IterableType = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _process_running_result = None _exception = None _map_result = self._Processors_Pool.map_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) try: _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None) -> None: self.reset_result() _process_running_result = None try: _process_running_result = self._Processors_Pool.starmap( func=function, iterable=args_iter, chunksize=chunksize) _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def async_map_by_args(self, function: Callable, args_iter: IterableType[IterableType] = (), chunksize: int = None, callback: Callable = None, error_callback: Callable = None) -> None: self.reset_result() _map_result = self._Processors_Pool.starmap_async( func=function, iterable=args_iter, chunksize=chunksize, callback=callback, error_callback=error_callback) _process_running_result = _map_result.get() _process_run_successful = _map_result.successful() # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=None) def imap(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def imap_unordered(self, function: Callable, args_iter: IterableType = (), chunksize: int = 1) -> None: self.reset_result() _process_running_result = None try: imap_running_result = self._Processors_Pool.imap_unordered( func=function, iterable=args_iter, chunksize=chunksize) _process_running_result = [ result for result in imap_running_result ] _exception = None _process_run_successful = True except Exception as e: _exception = e _process_run_successful = False # Save Running result state and Running result value as dict for __result in (_process_running_result or []): self._result_saving(successful=_process_run_successful, result=__result, exception=_exception) def _result_saving(self, successful: bool, result: List, exception: Exception) -> None: _process_result = { "successful": successful, "result": result, "exception": exception } self._Processors_Running_Result.append(_process_result) def close(self) -> None: self._Processors_Pool.close() self._Processors_Pool.join() def terminal(self) -> None: self._Processors_Pool.terminate() def get_result(self) -> List[_ProcessPoolResult]: return self.result() def _saving_process(self) -> List[_ProcessPoolResult]: _pool_results = [] for __result in self._Processors_Running_Result: _pool_result = _ProcessPoolResult() _pool_result.is_successful = __result["successful"] _pool_result.data = __result["result"] _pool_results.append(_pool_result) return _pool_results
def main(lib_name, in_file=sys.stdin, out_file=sys.stdout, cached_only=False, just=None, num_workers=None, read_cache=True, write_cache=True): def cb_ready0(result, progress): """Informer about the progress of the process.""" name = result[0] url = result[1] output = result[2] cached = result[3] if output: textout = "%s" % (name) elif url != None: textout = "Failed to parse output for %s in %s.\n" % (name, url) else: textout = None if cached_only else "Could not locate %s documentation.\n" % (name) progress[0] += 1 percent = float(progress[0]) / float(progress[1]) * 100.0 if textout == None: return if not out_file.isatty(): max_cols = 75 sys.stderr.write("\r%s" % (' ' * max_cols)) #progbar = "\r[%.2d%%] " % (int(percent)) progbar = "\r[%d/%d] " % (progress[0], progress[1]) max_cols = max_cols - len(progbar) if len(textout) > max_cols: if textout.endswith('\n'): textout = textout[:max_cols-4] + '...\n' else: textout = textout[:max_cols-3] + '...' else: #progbar = "[%.2d%%] " % int(percent) progbar = "[%d/%d] " % (progress[0], progress[1]) textout = textout + '\n' if not textout.endswith('\n') else textout sys.stderr.write("%s%s" % (progbar, textout)) sys.stderr.flush() def write_csv(f): """Process and write the csv data into the file f.""" progress = [0, len(funcs)] jobs = [pool.apply_async(msdn_get_doc, (name, msdn_cache, cached_only), callback=lambda r: cb_ready0(r, progress)) for name in funcs] f.write("Name,Prototype,Bad Return,Errors,SDK Client,SDK Server,SDK Phone,URL\n") for job in jobs: name, url, output, cached = job.get() html = output and output.html or None prototype = output and output.prototype or "" errors = output and " ".join(output.errors) or "" bad_ret = output and output.bad_ret or "" sdkclient = (output and output.sdks[0]) and output.sdks[0] or "" sdkserver = (output and output.sdks[1]) and output.sdks[1] or "" sdkphone = (output and output.sdks[2]) and output.sdks[2] or "" f.write('%s,"%s","%s","%s","%s","%s","%s","%s"\n' % (name, prototype, bad_ret, errors, sdkclient, sdkserver, sdkphone, url)) #f.write('%s,"%s","%s","%s","%s","%s","%s","%s"\n' % (name, prototype, url, sdkclient, sdkserver, sdkphone, bad_ret, errors)) if html != None and not cached and write_cache: msdn_cache.write(lib_name, name, html) if num_workers == None: # IO bound operations, so 10 times the cpu count isn't a problem. num_workers = min(40, 10 * multiprocessing.cpu_count()) pool = Pool(processes=num_workers, initializer=_init_worker) msdn_cache = read_cache and MsdnCache.open(MSDN_CACHE_PATH) or MsdnCache() funcs = find_wide_functions(read_def(in_file)) if just != None: funcs = [fc for fc in funcs if remove_wide(fc) in just] try: write_csv(out_file) except KeyboardInterrupt: pool.terminate() pool.join() # because of our progress bar sys.stderr.write("\n")
try: print('press CTRL-c to stop generating samples') it = pool.imap_unordered(f, cycle(sim_list)) while 1: sim, result = it.next(timeout=SIMULATION_TIMEOUT) completed.append(sim) sys.stdout.write('.') for p, wins in result.items(): container[p].append( (sim, wins) ) except KeyboardInterrupt: pool.close() print('stopping all simulations...') finally: pool.terminate() pool.join() c = dict(Counter(completed).most_common()) for idx, sim in enumerate([s.__name__ for s in sim_list]): print('Test: {0}, Iterations {1}, Heuristic: {2}'.format(idx, c[sim], sim)) for pair in pairs: print('') print(pair) f, p = f_oneway(*[[i[1] for i in container[pair] if i[0] == sim] for sim in [s.__name__ for s in sim_list]]) print('F-stat: {0} at sig {1}: {2}'.format(str(round(f, 3)).ljust(7), str(round(p, 3)).ljust(7), ['NULL','REJECT'][p <= .05])) if p <= .05:
def run(name): print("%s子进程开始,进程ID:%d" % (name, os.getpid())) start = time() sleep(random.choice([1, 2, 3, 4])) end = time() print("%s子进程结束,进程ID:%d。耗时0.2%f" % (name, os.getpid(), end - start)) if __name__ == "__main__": print("父进程开始") # 创建多个进程,表示可以同时执行的进程数量。默认大小是CPU的核心数 p = Pool(4) for i in range(10): # 创建进程,放入进程池统一管理 p.apply_async(run, args=(i, )) # 如果我们用的是进程池,在调用join()之前必须要先close(), # 并且在close()之后不能再继续往进程池添加新的进程 p.close() # 进程池对象调用join,会等待进程池中所有的子进程结束完毕再去结束父进程 p.join() print("父进程结束。") p.terminate() # # close():如果我们用的是进程池,在调用join()之前必须要先close(), # 并且在close()之后不能再继续往进程池添加新的进程 # join():进程池对象调用join,会等待进程池中所有的子进程结束完毕再去结束父进程 # terminate():一旦运行到此步,不管任务是否完成,立即终止。
class DepthFrameCompressor(object): """ Asynchronous compression pipeline for depth frames. *kinect* is a :py:class:`streamkinect2.mock.MockKinect`-like object. Depth frames emitted by :py:meth:`on_depth_frame` will be compressed with frame-drop if the compressor becomes overloaded. If *io_loop* is provided, it specifies the :py:class:`tornado.ioloop.IOLoop` which is used to co-ordinate the worker process. If not provided, the global instance is used. .. py:attribute:: kinect Kinect object associated with this compressor. """ on_compressed_frame = Signal() """Signal emitted when a new compressed frame is available. Receivers take a single keyword argument, *compressed_frame*, which is a Python buffer-like object containing the compressed frame data. The signal is emitted on the IOLoop thread.""" # The maximum number of frames we can be waiting for before we start # dropping them. _MAX_IN_FLIGHT = cpu_count() + 1 def __init__(self, kinect, io_loop=None): # Public attributes self.kinect = kinect # Private attributes self._io_loop = io_loop or tornado.ioloop.IOLoop.instance() self._pool = Pool() # worker process pool self._n_in_flight = 0 # How many frames are we waiting for? self._n_dropped = 0 # Wire ourselves up for depth frame events kinect.on_depth_frame.connect(self._on_depth_frame, sender=kinect) def __del__(self): # As a courtesy, terminate the worker pool to avoid having a sea of # dangling processes. self._pool.terminate() def _on_compressed_frame(self, compressed_frame): # Record arrival of frame self._n_in_flight -= 1 # Send signal try: self._io_loop.add_callback( self.on_compressed_frame.send, self, compressed_frame=compressed_frame ) except Exception as e: # HACK: Since multiprocessing *might* call this handler after the # io loop has shut down (which will raise an Exception) and because # there's no documented way to determine if the io loop is still # alive ahead of time, we swallow exceptions here. This should # happen rarely when one is rapidly starting and stopping IOLoops # (such as in the test-suite!) so log it as a warning. log.warn('DepthFrameCompressor swallowed {0} exception'.format(e)) def _on_depth_frame(self, kinect, depth_frame): # If we aren't waiting on too many frames, submit if self._n_in_flight < DepthFrameCompressor._MAX_IN_FLIGHT: self._pool.apply_async(_compress_depth_frame, args=(depth_frame,), callback=self._on_compressed_frame) self._n_in_flight += 1 else: # Only log every 10 dropped frames to avoid being too spammy self._n_dropped += 1 if self._n_dropped % 10 == 0: log.warn('Dropped {0} depth frames'.format(self._n_dropped))
class ProcessExecutor(object): """ Create promises which will deliver in a separate process. """ def __init__(self, processes=None): self._processes = processes self._pool = None def __enter__(self): return self def __exit__(self, exc_type, _exc_val, _exc_tb): """ Using the managed interface forces blocking delivery at the end of the managed segment. """ self.deliver() return (exc_type is None) def _promise(self): """ override to use a different promise mechanism """ return promise(blocking=True) def _get_pool(self): """ override to provide a different pool implementation """ if not self._pool: self._pool = Pool(processes=self._processes) return self._pool def future(self, work, *args, **kwds): """ Promise to deliver on the results of work in the future. Parameters ---------- work : `callable` This is the work which will be performed to deliver on the future. *args : `optional positional parameters` arguments to the `work` function **kwds : `optional named parameters` keyword arguments to the `work` function Returns ------- value : `promise` a promise acting as a placeholder for the result of evaluating `work(*args, **kwds)`. Note that calling `deliver` on this promise will potentially block until the underlying result is available. """ promised, setter, seterr = self._promise() def callback(value): # value is collected as the result of the _perform_work # function at the top of this module success, result = value if success: setter(result) else: seterr(*result) # queue up the work in our pool pool = self._get_pool() pool.apply_async(_perform_work, [work, args], kwds, callback) return promised def terminate(self): """ Breaks all the remaining undelivered promises, halts execution of any parallel work being performed. Any promise which had not managed to be delivered will never be delivered after calling `terminate`. Attempting to call `deliver` on them will result in a deadlock. """ # TODO: is there a way for us to cause all undelivered # promises to raise an exception of some sort when this # happens? That would be better than deadlocking while waiting # for delivery. if self._pool is not None: self._pool.terminate() self._pool = None def deliver(self): """ Deliver on all underlying promises. Blocks until complete. """ if self._pool is not None: self._pool.close() self._pool.join() self._pool = None
def raster2pyramid( input_file, output_dir, options ): """ Creates a tile pyramid out of an input raster dataset. """ pyramid_type = options["pyramid_type"] scale_method = options["scale_method"] output_format = options["output_format"] resampling = options["resampling"] zoom = options["zoom"] bounds = options["bounds"] overwrite = options["overwrite"] # Prepare process parameters minzoom, maxzoom = _get_zoom(zoom, input_file, pyramid_type) process_file = os.path.join( os.path.dirname(os.path.realpath(__file__)), "tilify.py" ) with rasterio.open(input_file, "r") as input_raster: output_bands = input_raster.count input_dtype = input_raster.dtypes[0] output_dtype = input_raster.dtypes[0] nodataval = input_raster.nodatavals[0] if not nodataval: nodataval = 0 if output_format == "PNG": if output_bands > 3: output_bands = 3 output_dtype = 'uint8' scales_minmax = () if scale_method == "dtype_scale": for index in range(1, output_bands+1): scales_minmax += (DTYPE_RANGES[input_dtype], ) elif scale_method == "minmax_scale": for index in range(1, output_bands+1): band = input_raster.read(index) scales_minmax += ((band.min(), band.max()), ) elif scale_method == "crop": for index in range(1, output_bands+1): scales_minmax += ((0, 255), ) if input_dtype == "uint8": scale_method = None scales_minmax = () for index in range(1, output_bands+1): scales_minmax += ((None, None), ) # Create configuration config = {} config.update( process_file=process_file, output={ "path": output_dir, "format": output_format, "type": pyramid_type, "bands": output_bands, "dtype": output_dtype }, scale_method=scale_method, scales_minmax=scales_minmax, input_files={"raster": input_file}, config_dir=os.getcwd(), process_minzoom=minzoom, process_maxzoom=maxzoom, nodataval=nodataval, resampling=resampling, bounds=bounds, pixelbuffer=5, baselevel={"zoom": maxzoom, "resampling": resampling} ) LOGGER.info("preparing process ...") try: mapchete = Mapchete( MapcheteConfig( config, zoom=zoom, bounds=bounds ) ) except PyCompileError as error: print error return except: raise # Prepare output directory and logging if not os.path.exists(output_dir): os.makedirs(output_dir) logging.config.dictConfig(get_log_config(mapchete)) for zoom in reversed(range(minzoom, maxzoom+1)): # Determine work tiles and run work_tiles = mapchete.get_work_tiles(zoom) func = partial(_worker, mapchete=mapchete, overwrite=overwrite ) pool = Pool() try: pool.map_async(func, work_tiles) pool.close() except KeyboardInterrupt: LOGGER.info( "Caught KeyboardInterrupt, terminating workers" ) pool.terminate() break except: raise finally: pool.close() pool.join()
try: for c in count(): # Infinite range # Create a new pool each batch and run 'reps' timesteps in each world pool = Pool(cores_used) for res in tqdm( pool.imap_unordered(drive_wrapper, enumerate(worlds)), position=0, desc=f"Batch {c}", total=n_worlds, ): pass pool.close() pool.join() # Back up data and map files for i, conf in enumerate(configs): call([ "cp", conf["output"]["data"], "{}/{}.d".format(local_backup, i) ]) call([ "cp", conf["output"]["map"], "{}/{}.m".format(local_backup, i) ]) if flag.flag: break except KeyboardInterrupt: pool.terminate() # Kill all processes
def pyorbit_emcee(config_in, input_datasets=None, return_output=None): try: import emcee except: print("ERROR: emcee not installed, this will not work") quit() os.environ["OMP_NUM_THREADS"] = "1" optimize_dir_output = './' + config_in['output'] + '/optimize/' pyde_dir_output = './' + config_in['output'] + '/pyde/' emcee_dir_output = './' + config_in['output'] + '/emcee/' reloaded_optimize = False reloaded_pyde = False reloaded_emcee_multirun = False reloaded_emcee = False try: mc, population, starting_point, theta_dict = pyde_load_from_cpickle( pyde_dir_output, prefix='') reloaded_pyde = True except: pass try: mc, starting_point, population, _, _, sampler_chain, _, _, theta_dict, _ = \ emcee_load_from_cpickle(emcee_dir_output, prefix='MR') reloaded_emcee_multirun = True except: pass try: mc, starting_point, population, _, _, sampler_chain, sampler_lnprobability, _, theta_dict, _ = \ emcee_load_from_cpickle(emcee_dir_output) reloaded_emcee = True except: pass try: starting_point, previous_boundaries, theta_dict = starting_point_load_from_cpickle( optimize_dir_output) reloaded_optimize = True except: pass print() print('reloaded_optimize: ', reloaded_pyde) print('reloaded_pyde: ', reloaded_pyde) print('reloaded_emcee_multirun: ', reloaded_emcee_multirun) print('reloaded_emcee: ', reloaded_emcee) if reloaded_emcee: """ There's no need to do anything""" flatchain = emcee_flatchain(sampler_chain, mc.emcee_parameters['nburn'], mc.emcee_parameters['thin']) mc.model_setup() mc.initialize_logchi2() results_analysis.print_integrated_ACF(sampler_chain, theta_dict, mc.emcee_parameters['thin']) results_analysis.results_resumen(mc, flatchain) if return_output: return mc, sampler_chain, sampler_lnprobability else: return reloaded_mc = reloaded_pyde or reloaded_emcee_multirun if reloaded_mc: previous_boundaries = mc.bounds mc = ModelContainerEmcee() pars_input(config_in, mc, input_datasets) if mc.pyde_parameters['shutdown_jitter'] or mc.emcee_parameters[ 'shutdown_jitter']: for dataset_name, dataset in mc.dataset_dict.items(): dataset.shutdown_jitter() # keep track of which version has been used to perform emcee computations mc.emcee_parameters['version'] = emcee.__version__[0] mc.model_setup() mc.create_variables_bounds() mc.initialize_logchi2() results_analysis.results_resumen(mc, None, skip_theta=True) mc.pyde_dir_output = pyde_dir_output mc.emcee_dir_output = emcee_dir_output mc.emcee_parameters['nwalkers'] = mc.ndim * \ mc.emcee_parameters['npop_mult'] if mc.emcee_parameters['nwalkers'] % 2 == 1: mc.emcee_parameters['nwalkers'] += 1 if not os.path.exists(mc.emcee_dir_output): os.makedirs(mc.emcee_dir_output) print() print('emcee version: ', emcee.__version__) if mc.emcee_parameters['version'] == '2': print('WARNING: upgrading to version 3 is strongly advised') print() print('Include priors: ', mc.include_priors) print() print('Reference Time Tref: ', mc.Tref) print() print('Dimensions = ', mc.ndim) print('Nwalkers = ', mc.emcee_parameters['nwalkers']) if not getattr(mc, 'use_threading_pool', False): mc.use_threading_pool = False print() print('Using threading pool:', mc.use_threading_pool) print() print('*************************************************************') print() if reloaded_mc: theta_dict_legacy = theta_dict.copy() population_legacy = population.copy() theta_dict = results_analysis.get_theta_dictionary(mc) population = np.zeros([mc.emcee_parameters['nwalkers'], mc.ndim], dtype=np.double) for theta_name, theta_i in theta_dict.items(): population[:, theta_i] = population_legacy[:, theta_dict_legacy[ theta_name]] mc.bounds[theta_i] = previous_boundaries[ theta_dict_legacy[theta_name]] starting_point = np.median(population, axis=0) # print(starting_point) # print(population) print('Using previous population as starting point. ') sys.stdout.flush() print() else: if mc.starting_point_flag or reloaded_optimize: if reloaded_optimize: print( 'Using the output from a previous optimize run as starting point' ) theta_dict_legacy = theta_dict.copy() starting_point_legacy = starting_point.copy() theta_dict = results_analysis.get_theta_dictionary(mc) for theta_name, theta_i in theta_dict.items(): starting_point[theta_i] = starting_point_legacy[ theta_dict_legacy[theta_name]] else: print('Using user-defined starting point from YAML file') mc.create_starting_point() starting_point = mc.starting_point population = np.zeros([mc.emcee_parameters['nwalkers'], mc.ndim], dtype=np.double) for ii in range(0, mc.emcee_parameters['nwalkers']): population[ii, :] = np.random.normal(starting_point, 0.0000001) print( 'to create a synthetic population extremely close to the starting values.' ) sys.stdout.flush() else: try: from pyde.de import DiffEvol except ImportError: print( 'ERROR! PyDE is not installed, run first with optimize instead of emcee' ) quit() if not os.path.exists(mc.pyde_dir_output): os.makedirs(mc.pyde_dir_output) print('PyDE running') sys.stdout.flush() de = DiffEvol(mc, mc.bounds, mc.emcee_parameters['nwalkers'], maximize=True) de.optimize(int(mc.pyde_parameters['ngen'])) population = de.population starting_point = np.median(population, axis=0) theta_dict = results_analysis.get_theta_dictionary(mc) """ bounds redefinition and fix for PyDE anomalous results """ if mc.recenter_bounds_flag: pyde_save_to_pickle(mc, population, starting_point, theta_dict, prefix='orig') mc.recenter_bounds(starting_point) population = mc.fix_population(starting_point, population) starting_point = np.median(population, axis=0) print('Boundaries redefined after PyDE output') pyde_save_to_pickle(mc, population, starting_point, theta_dict) print('PyDE completed') sys.stdout.flush() results_analysis.results_resumen(mc, starting_point, compute_lnprob=True, is_starting_point=True) if mc.use_threading_pool: if mc.emcee_parameters['version'] == '2': threads_pool = emcee.interruptible_pool.InterruptiblePool( mc.emcee_parameters['nwalkers']) else: from multiprocessing.pool import Pool as InterruptiblePool threads_pool = InterruptiblePool(mc.emcee_parameters['nwalkers']) if mc.emcee_parameters['multirun'] and not reloaded_emcee_multirun: for ii in range(0, mc.emcee_parameters['multirun_iter']): print('emcee exploratory run #', ii, ' of ', mc.emcee_parameters['multirun_iter']) # sampler = emcee.EnsembleSampler(mc.emcee_parameters['nwalkers'], mc.ndim, mc, # threads=mc.emcee_parameters['nwalkers']) if mc.use_threading_pool: sampler = emcee.EnsembleSampler( mc.emcee_parameters['nwalkers'], mc.ndim, mc, pool=threads_pool) else: sampler = emcee.EnsembleSampler( mc.emcee_parameters['nwalkers'], mc.ndim, mc) population, prob, state = sampler.run_mcmc( population, mc.emcee_parameters['multirun']) flatchain = emcee_flatchain(sampler.chain, mc.emcee_parameters['nburn'], mc.emcee_parameters['thin']) results_analysis.results_resumen(mc, flatchain) max_ind = np.argmax(prob) starting_point = population[max_ind, :] population = np.asarray([ starting_point + 1e-4 * np.random.randn(mc.ndim) for i in range(mc.emcee_parameters['nwalkers']) ]) sampler.reset() theta_dict = results_analysis.get_theta_dictionary(mc) emcee_save_to_cpickle(mc, starting_point, population, prob, state, sampler, theta_dict, prefix='MR_' + repr(ii)) emcee_save_to_cpickle(mc, starting_point, population, prob, state, sampler, theta_dict, prefix='MR') flatchain = emcee_flatchain(sampler.chain, mc.emcee_parameters['nburn'], mc.emcee_parameters['thin']) results_analysis.print_integrated_ACF(sampler.chain, theta_dict, mc.emcee_parameters['thin']) results_analysis.results_resumen(mc, flatchain) print('emcee exploratory runs completed') sys.stdout.flush() print() print('Running emcee') state = None if mc.use_threading_pool: sampler = emcee.EnsembleSampler(mc.emcee_parameters['nwalkers'], mc.ndim, mc, pool=threads_pool) else: sampler = emcee.EnsembleSampler(mc.emcee_parameters['nwalkers'], mc.ndim, mc) if mc.emcee_parameters['nsave'] > 0: print() print(' Saving temporary steps') niter = int(mc.emcee_parameters['nsteps'] / mc.emcee_parameters['nsave']) sampled = 0 for i in range(0, niter): population, prob, state = sampler.run_mcmc( population, mc.emcee_parameters['nsave'], thin=mc.emcee_parameters['thin'], rstate0=state) sampled += mc.emcee_parameters['nsave'] theta_dict = results_analysis.get_theta_dictionary(mc) emcee_save_to_cpickle(mc, starting_point, population, prob, state, sampler, theta_dict, samples=sampled) flatchain = emcee_flatchain(sampler.chain, mc.emcee_parameters['nburn'], mc.emcee_parameters['thin']) results_analysis.print_integrated_ACF(sampler.chain, theta_dict, mc.emcee_parameters['thin']) results_analysis.results_resumen(mc, flatchain) print() print(sampled, ' steps completed, average lnprob:, ', np.median(prob)) sys.stdout.flush() else: population, prob, state = sampler.run_mcmc( population, mc.emcee_parameters['nsteps'], thin=mc.emcee_parameters['thin']) theta_dict = results_analysis.get_theta_dictionary(mc) emcee_save_to_cpickle(mc, starting_point, population, prob, state, sampler, theta_dict) flatchain = emcee_flatchain(sampler.chain, mc.emcee_parameters['nburn'], mc.emcee_parameters['thin']) results_analysis.print_integrated_ACF(sampler.chain, theta_dict, mc.emcee_parameters['thin']) results_analysis.results_resumen(mc, flatchain) print() print('emcee completed') if mc.use_threading_pool: # close the pool of threads threads_pool.close() threads_pool.terminate() threads_pool.join() """ A dummy file is created to let the cpulimit script to proceed with the next step""" emcee_create_dummy_file(mc) if return_output: return mc, sampler.chain, sampler.lnprobability
def main() -> None: """Run BarcSeek""" parser = arguments.set_args() # type: argparse.ArgumentParser if not sys.argv[1:]: sys.exit(parser.print_help()) args = vars(parser.parse_args()) # type: Dict[str, Any] # Make an output directory # if os.path.exists(args['outdirectory']): # args['outdirectory'] = args['outdirectory'] + time.strftime('_%Y-%m-%d_%H:%M') os.makedirs(args['outdirectory'], exist_ok=True) # Make a prefix for project-level output files output_prefix = os.path.join(args['outdirectory'], sys.argv[0]) # type: str # Setup the logger # Formatting values log_format = '%(asctime)s %(levelname)s:\t%(message)s' # type: str date_format = '%Y-%m-%d %H:%M:%S' # type: str # Formatters stripped_formatter = utilities.StrippedFormatter( fmt=log_format, datefmt=date_format) # utilities.StrippedFormatter colored_formater = utilities.ColoredFormatter( fmt=log_format, datefmt=date_format) # type: utilities.ColoredFormatter # Open /dev/null (or whatever it is on Windows) to send basic stream information to devnull = open(os.devnull, 'w') # Configure the logger verbosity = _set_verbosity(level=args['verbosity']) # type: int logging.basicConfig( stream=devnull, level=verbosity, ) # If we're being verbose, capture other warnings (mainly matplotlib and numpy) # Otherwise, ignore them if verbosity == logging.DEBUG: logging.captureWarnings(True) else: warnings.filterwarnings('ignore') # Setup a FileHandler for the log file # Use a StrippedFormatter to remove extra ANSI color codes logname = output_prefix + '.log' logfile = logging.FileHandler(filename=logname, mode='w') # type: Logging.FileHandler logfile.setFormatter(stripped_formatter) logging.getLogger().addHandler(logfile) # Setup the console handler # Use a ColoredFormatter because colors are cool console = logging.StreamHandler() # type: logging.StreamHandler console.setFormatter(colored_formater) logging.getLogger().addHandler(console) # Begin the program logging.info("Welcome to %s!", os.path.basename(sys.argv[0])) program_start = time.time() # type: float # Read in the barcodes barcodes_dict = barcodes.read_barcodes( barcodes_file=args['barcodes']) # type: Dict[str, str] if barcodes.barcode_check(barcode_dict=barcodes_dict): raise ValueError( logging.error("Cannot have ambiguous or duplicate barcodes")) # Read in the sample sheet and match barcode sequences to each sample sample_sheet = utilities.load_sample_sheet( sheet_file=args['sample_sheet'] ) # type: Dict[str, Tuple[str, Optional[str]]] sample_barcodes = utilities.match_barcodes( sample_sheet=sample_sheet, barcodes_dictionary=barcodes_dict ) # type: Dict[str, Tuple[str, Optional[str]]] print(sample_barcodes) raise SystemExit # Create the multiprocessing pool # Tell the pool to ignore SIGINT (^C) # by turning INTERUPT signals into IGNORED signals sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) # type: function # Setup our multiprocessing pool # Allow the user to specify the number of jobs to run at once # If not specified, let multiprocessing figure it out if args['num_cores']: pool = Pool(processes=args['num_cores']) else: pool = Pool() # Re-enable the capturing of SIGINT, catch with KeyboardInterrupt # or ExitPool, depending on how the exit was initiated # Note: SystemExits are swallowed by Pool, no way to change that signal.signal(signal.SIGINT, sigint_handler) if getattr(pool, '_processes') > 1: try: # Use map_async and get with a large timeout # to allow for KeyboardInterrupts to be caught # and handled with the try/except pass except KeyboardInterrupt: pool.terminate() pool.join() raise SystemExit('\nkilled') else: pool.join() # Otherwise, don't bother with pool.map() make life easy else: # Clean up the pool pool.close() pool.terminate() pool.join() # Use standard map # End the program logging.debug("Entire program took %s seconds to run", round(time.time() - program_start, 3)) devnull.close() try: logfile.close() except NameError: pass