class _Worker(object): def __init__(self, protocol=None): self.protocol = protocol self.pool = ProcessPoolExecutor(max_workers=1) self.pool.submit(id, 42).result() # start the worker process def run(self, func, *args, **kwargs): """Synchronous remote function call""" input_payload = dumps((func, args, kwargs), protocol=self.protocol) result_payload = self.pool.submit( call_func, input_payload, self.protocol).result() result = loads(result_payload) if isinstance(result, BaseException): raise result return result def memsize(self): workers_pids = [p.pid if hasattr(p, "pid") else p for p in list(self.pool._processes)] num_workers = len(workers_pids) if num_workers == 0: return 0 elif num_workers > 1: raise RuntimeError("Unexpected number of workers: %d" % num_workers) return psutil.Process(workers_pids[0]).memory_info().rss def close(self): self.pool.shutdown(wait=True)
def main(): """ Makes banner requests with a ThreadPoolExecutor. """ arg_parser = ArgumentParser() arg_parser.add_argument("--ip", help="IP address", required=True) arg_parser.add_argument("--pool", help="Executor pool type", choices=("thread", "process"), required=True) arg_parser.add_argument( "--workers", help="Number of executor workers", type=int, choices=range(1, 9), required=True ) args = arg_parser.parse_args() ip = args.ip pool = args.pool workers = args.workers if pool == "process": executor = ProcessPoolExecutor(max_workers=workers) elif pool == "thread": executor = ThreadPoolExecutor(max_workers=workers) for i in range(1, 256): for port in get_ports(): executor.submit(banner_request, "{0}.{1}".format(ip, i), port) print("[!] Finished spawning banner requests")
def splice_gmaps(threadpool, tilefolder, tempfiles, name): processpool = ProcessPoolExecutor() caption = "Rendering Zoom Layers {}".format(name) loadingbar = Bar(caption=caption) loadingbar.set_progress(0, caption) pygame.display.update() side = 1600 zoom_levels = 4 factor = 2 ** (zoom_levels - 1) masterside = side * factor plates = generate_plate_coords(factor, tempfiles) master_surface = pygame.Surface((masterside, masterside)) done = 0 total = len(tempfiles) + len(plates) * sum((4 ** x for x in range(zoom_levels))) fraction = 100 / total def render_base_to_master(task): imgdata, size, location = task.result() tempsurf = pygame.image.frombuffer(imgdata, size, "RGB") master_surface.blit(tempsurf, location) tasks = [] for masterpos, pieces in plates.items(): master_surface.fill((132, 170, 248)) for x, y in pieces: task = processpool.submit(unpack, tempfiles, x, y, ((x % factor) * side, (y % factor) * side)) tasks.append(threadpool.submit(render_base_to_master, task)) tasks.append(task) current_area = masterside for task in tasks: task.result() done += 0.5 loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total)) for z in range(zoom_levels): tasks = [] pieces = masterside // current_area x_off = masterpos[0] * pieces y_off = masterpos[1] * pieces for xp in range(pieces): for yp in range(pieces): temp = pygame.Surface.subsurface(master_surface, (xp * current_area, yp * current_area, current_area, current_area)) filename = "screen_{}_{}_{}.png".format(z + 1, x_off + xp, y_off + yp) data = pygame.image.tostring(temp, "RGB") tasks.append(processpool.submit(render_plate, data, tilefolder, temp.get_size(), side, filename)) for task in tasks: task.result() done += 1 loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total)) current_area //= 2 processpool.shutdown()
def main(): parser = argparse.ArgumentParser() group = parser.add_mutually_exclusive_group(required=True) group.add_argument("--filter", action="store_true", help="act as a filter") group.add_argument("--transform", metavar="MAPPING", type=argparse.FileType("r"), help="transform all files given in the mapping file") parser.add_argument("--srcprefix", metavar="PREFIX", default="", help="when transforming data files prepend this PREFIX to source paths") parser.add_argument("--dstprefix", metavar="PREFIX", default="", help="when transforming data files prepend this PREFIX to destination paths") args = parser.parse_args() if args.filter: check_stream(sys.stdin, sys.stdout) else: exe = Executor() res = [] for lineno, line in enumerate(args.transform): line = line.split('#', 1)[0] # comment line = line.rstrip() # trailing space or newline match = re.match(r'^(\S+):\s*(\S+)$', line) if not match: raise ValueError("syntax error on line %d" % (lineno + 1)) destination, source = match.groups() source = os.path.join(args.srcprefix, source) destination = os.path.join(args.dstprefix, destination) res.append(exe.submit(transform, source, destination)) while res: res.pop(0).result() # propagate exceptions
class ThreadPool(object): '''线程池实现''' def __init__(self, thread_num=1, process_num=1, q_size=2000, daemon=True): self.thread_pool = _ThreadPoolExecutor(thread_num, daemon) self.process_pool = ProcessPoolExecutor(process_num) self.result_queue = Queue(q_size) def wait(self, threads=[]): thread_wait(threads) def add_thread(self, target, args=()): result = self.thread_pool.submit(target, *args) return result def add_process(self, target, args=()): result = self.process_pool.submit(target, *args) return result def thread_map(self, target, args=[]): return [self.thread_pool.submit(target, arg) for arg in args] def process_map(self, target, args=[]): return self.process_pool.map(target, args) def map(self, target, args=[]): return self.process_map(target, args)
def build_from_path(hparams, input_dirs, mel_dir, linear_dir, wav_dir, n_jobs=12, tqdm=lambda x: x): """ Preprocesses the speech dataset from a gven input path to given output directories Args: - hparams: hyper parameters - input_dir: input directory that contains the files to prerocess - mel_dir: output directory of the preprocessed speech mel-spectrogram dataset - linear_dir: output directory of the preprocessed speech linear-spectrogram dataset - wav_dir: output directory of the preprocessed speech audio dataset - n_jobs: Optional, number of worker process to parallelize across - tqdm: Optional, provides a nice progress bar Returns: - A list of tuple describing the train examples. this should be written to train.txt """ # We use ProcessPoolExecutor to parallelize across processes, this is just for # optimization purposes and it can be omited executor = ProcessPoolExecutor(max_workers=n_jobs) futures = [] index = 1 for input_dir in input_dirs: with open(os.path.join(input_dir, 'metadata.csv'), encoding='utf-8') as f: for line in f: parts = line.strip().split('|') wav_path = os.path.join(input_dir, 'wavs', '{}.wav'.format(parts[0])) text = parts[2] futures.append(executor.submit(partial(_process_utterance, mel_dir, linear_dir, wav_dir, index, wav_path, text, hparams))) index += 1 return [future.result() for future in tqdm(futures) if future.result() is not None]
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x): '''Preprocesses the LJ Speech dataset from a given input path into a given output directory. Args: in_dir: The directory where you have downloaded the LJ Speech dataset out_dir: The directory to write the output into num_workers: Optional number of worker processes to parallelize across tqdm: You can optionally pass tqdm to get a nice progress bar Returns: A list of tuples describing the training examples. This should be written to train.txt ''' # We use ProcessPoolExecutor to parallelize across processes. This is just an optimization and you # can omit it and just call _process_utterance on each input if you want. executor = ProcessPoolExecutor(max_workers=num_workers) futures = [] index = 1 with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f: for line in f: parts = line.strip().split('|') wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0]) text = parts[2] futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text))) index += 1 return [future.result() for future in tqdm(futures)]
def __call__(self, workflow, input_artifact_filepaths, parameter_references, output_artifact_filepaths): input_artifact_abs_filepaths = \ {k: os.path.abspath(v) for k, v in input_artifact_filepaths.items()} output_artifact_abs_filepaths = \ {k: os.path.abspath(v) for k, v in output_artifact_filepaths.items()} job = workflow.to_script(input_artifact_abs_filepaths, parameter_references, output_artifact_abs_filepaths) temp_dir = tempfile.mkdtemp() pool = ProcessPoolExecutor(max_workers=1) py_filename = os.path.join(temp_dir, 'job.py') with open(py_filename, 'w') as py_file: py_file.write(job.code) # TODO: handle subproccess exceptions future = pool.submit(subprocess.run, [self._python_executable, py_filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE) # TODO: handle callback exceptions # TODO: make sure that tempdir is cleaned up even if there is an # exception in pool.submit or the callback future.add_done_callback(lambda _: shutil.rmtree(temp_dir)) return future
def on_message(self, message): print len(message) result = yield tornado.gen.Task(self.process_message, message) return pool = ProcessPoolExecutor() fut = pool.submit(call_process, message) ret = yield fut pool.shutdown()
def _run(self, instance_id: str, service_id: str, plan_id: str, accepts_incomplete: bool, func: Callable, *func_args) -> Any: # The _match_synchronicity call must come first because it may raise an exception sync = self._match_synchronicity(service_id, plan_id, accepts_incomplete) executor = ProcessPoolExecutor(max_workers=1) future = executor.submit(func, *func_args) if sync: return future.result(timeout=59) else: self.async_ops[instance_id] = future raise ProvisioningAsynchronously
def probe( moduleName, args, torCtrl ): logger.info("Running module '%s'." % moduleName) module = __import__("modules.%s" % moduleName, fromlist=[moduleName]) # Obtain the list of exit relays to scan. if args.exit: exitRelays = [args.exit] else: hosts = [(socket.gethostbyname(host), port) for (host, port) in module.targets] exitRelays = exitselector.getExits(args.consensus, countryCode=args.country, hosts=hosts) count = len(exitRelays) if count < 1: raise error.ExitSelectionError("Exit selection yielded %d exits " \ "but need at least one." % count) logger.info("About to probe %d exit relays." % count) # Create circuit pool and set up stream attacher. circuitPool = circuitpool.new(torCtrl, list(exitRelays)) eventHandler = streamattacher.new(circuitPool, torCtrl) torCtrl.add_event_listener(eventHandler.newEvent, EventType.STREAM) circuits = torCtrl.get_circuits() logger.debug("Open circuits:") for circuit in circuits: logger.debug(circuit) executor = ProcessPoolExecutor(max_workers=const.CIRCUIT_POOL_SIZE) logger.debug("Beginning to populate process pool with %d jobs." % count) # Invoke a module instance for every exit relay. for _ in xrange(count, 0, -1): cmd = command.new(None) executor.submit(module.probe, cmd, count) count -= 1 logger.info("Submitted jobs. Terminating main scanner.")
def post(self): file = self.request.files['file'][0] hark.client.login() hark.client.createSession(default_hark_config) log.info("Uploading asynchrounously") pool = ProcessPoolExecutor(max_workers=2) future = pool.submit(async_upload, file) yield future pool.shutdown() log.info("Rendering visualization page") self.render('visualize.html')
def generate_stocks(freq=pd.Timedelta(seconds=60), directory=None): from concurrent.futures import ProcessPoolExecutor, wait e = ProcessPoolExecutor() if os.path.exists(os.path.join('data', 'daily')): glob_path = os.path.join('data', 'daily', '*') else: glob_path = os.path.join(daily_dir, '*') filenames = sorted(glob(glob_path)) futures = [e.submit(generate_stock, fn, directory=directory, freq=freq) for fn in filenames] wait(futures)
def build_from_path(in_dir, out_dir, num_workers=1): executor = ProcessPoolExecutor(max_workers=num_workers) futures = [] index = 1 with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f: for line in f: parts = line.strip().split('|') wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0]) text = parts[2] futures.append(executor.submit( partial(_process_utterance, out_dir, index, wav_path, text))) index += 1 return [future.result() for future in futures]
def main(argv=None): usage = """REDCap Data Model Generator Usage: redcap dball <version> [--dir=DIR] [--db=DB] [--host=HOST] [--port=PORT] [--user=USER] [--pass=PASS] Options: -h --help Show this screen. --dir=DIR Name of the directory to output the files [default: .]. --db=DB Name of the REDCap database [default: redcap]. --host=HOST Host of the database server [default: localhost]. --port=PORT Port of the database server [default: 3306]. --user=USER Username to connect with. --pass=PASS Password to connect with. If set to *, a prompt will be provided. --procs=PROCS Number of processes to spawn [default: 24]. """ # noqa from docopt import docopt args = docopt(usage, argv=argv, version='0.1') if args['--pass'] == '*': args['--pass'] = getpass('password: '******'--db'], args['--host'], args['--port'], args['--user'], args['--pass']) project_names = db_projects(conn) pool = ProcessPoolExecutor(max_workers=int(args['--procs'])) for name in project_names: pool.submit(worker, name, args) pool.shutdown()
class ConcurrentDownloader(BaseDownloader, ConcurrentMixin): """Concurrent ProcessPoolExecutor downloader :param pool_size: size of ThreadPoolExecutor :param timeout: request timeout in seconds """ def __init__( self, worker_class, worker_kwargs=None, pool_size=5, middlewares=None,): # configure executor self.pool_size = pool_size self.executor = ProcessPoolExecutor(max_workers=self.pool_size) # prepare worker params self.worker_params = { 'worker_class': worker_class, 'worker_kwargs': worker_kwargs or {}, } # ctrl-c support for python2.x # trap sigint signal.signal(signal.SIGINT, lambda s, f: s) super(ConcurrentDownloader, self).__init__( middlewares=middlewares ) def get(self, requests): for request in requests: # delegate request processing to the executor future = self.executor.submit( _run_download_worker, self.worker_params, request, ) # build Planned object done_future = Planned() # when executor finish request - fire done_future future.add_done_callback( partial(self._done, request, done_future) ) yield done_future def get_workers_count(self): return self.pool_size def stop(self): self.executor.shutdown()
def precompute_to_stream(self, stream, logger): """ File format: int64: nnz in total padding to 128 bytes double[ni]: x_squared double[(lmax + 1) * ni]: Lambda_0 double[(lmax + 1) * ni]: Lambda_1 ushort[(lmax + 1)**2]: i_stops Format of i_stops is m-major ordering, but with, additionally, even coefficents all coming before the odd ones. """ from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor executor = ProcessPoolExecutor(max_workers=8) start_pos = stream.tell() for i in range(2 * (self.lmax + 1)): write_int64(stream, 0) write_array(stream, self.x_squared) futures = [] for m in range(self.lmax + 1): for odd in [0, 1]: futures.append(executor.submit(precompute_single, self.thetas, self.lmax, self.epsilon_legendre, m, odd)) nnz_total = 0 Lambda_1_list = [] i_stops_list = [] nnz_list = [] it = iter(futures) for m in range(self.lmax + 1): for odd in [0, 1]: Lambda_0, Lambda_1, i_stops, nnz = it.next().result() logger.info('Got %s m=%d' % (['even', 'odd'][odd], m)) write_array(stream, Lambda_0) Lambda_1_list.append(Lambda_1) i_stops_list.append(i_stops) nnz_list.append(nnz) nnz_total += nnz for arr in Lambda_1_list: write_array(stream, arr) for arr in i_stops_list: write_array(stream, arr) end_pos = stream.tell() stream.seek(start_pos) for nnz in nnz_list: write_int64(stream, nnz) stream.seek(end_pos) return nnz_total
class TaskManager: def __init__(self, process_num, max_task_in_queue=100): """ :param process_num: max process number :param max_task_in_queue: max_process_number + pending task number """ self.process_num = process_num self.max_task_in_queue = max_task_in_queue self.pool = ProcessPoolExecutor(max_workers=process_num) self.task_map = {} self.lock = multiprocessing.Lock() def exec_command(self, command): """ simple process :param command: command :return: true or false, if sent success return True else return False """ self.lock.acquire() if len(self.task_map) < self.max_task_in_queue: self.task_map[command.timestamp] = self.pool.submit(command) self.task_map[command.timestamp].add_done_callback(functools.partial(self.task_done, command)) self.lock.release() return True else: self.lock.release() return False def task_done(self, command, future_obj): """ do not change this function :param command: comand obj :param future_obj: command result :return: """ self.lock.acquire() # print("pop:" + str(command.timestamp)) self.task_map.pop(command.timestamp) self.lock.release() #def shutdown(self): # self.pool.shutdown() def is_all_done(self): self.lock.acquire() for key in self.task_map: if self.task_map[key].running(): self.lock.release() return False self.lock.release() return True
def Main(): global gSymFileManager, gOptions, gPool if not ReadConfigFile(): return 1 # In a perfect world, we could create a process per cpu core. # But then we'd have to deal with cache sharing gPool = Pool(1) gPool.submit(initializeSubprocess, gOptions) # Setup logging in the parent process. # Ensure this is called after the call to initializeSubprocess to # avoid duplicate messages in Unix systems. SetLoggingOptions(gOptions["Log"]) LogMessage("Starting server with the following options:\n" + str(gOptions)) app = Application([ url(r'/(debug)', DebugHandler), url(r'/(nodebug)', DebugHandler), url(r"(.*)", SymbolHandler)]) app.listen(gOptions['portNumber'], gOptions['hostname']) try: # select on Windows doesn't return on ctrl-c, add a periodic # callback to make ctrl-c responsive if sys.platform == 'win32': PeriodicCallback(lambda: None, 100).start() IOLoop.current().start() except KeyboardInterrupt: LogMessage("Received SIGINT, stopping...") gPool.shutdown() LogMessage("Server stopped - " + gOptions['hostname'] + ":" + str(gOptions['portNumber'])) return 0
def spark_submit(exec_string, log_file, driver_path): """ asynchronously run the pyspark/sparktk submitted script while writing the logs to the log_file for the app :param exec_string: the command that is going to be run :param log_file: the file containing command(script) logs while running :param driver_path: the path to the main sparktk/pyspark script within the uploads folder :return: None """ print "Entering spark_submit" mark_submitted(driver_path) pool = Pool(max_workers=1) cmd_string = "%s >>%s 2>&1" % (exec_string, log_file) print "CMD stting is %s" % (cmd_string) future = pool.submit(subprocess.call, cmd_string, shell=True) future.driver_path = driver_path future.add_done_callback(mark_completed)
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x): executor = ProcessPoolExecutor(max_workers=num_workers) futures = [] index = 1 for book in books: with open(os.path.join(in_dir, book, 'sentence_index.txt')) as f: for line in f: parts = line.strip().split('\t') if line[0] is not '#' and len(parts) == 8 and float(parts[3]) > _min_confidence: wav_path = os.path.join(in_dir, book, 'wav', '%s.wav' % parts[0]) labels_path = os.path.join(in_dir, book, 'lab', '%s.lab' % parts[0]) text = parts[5] task = partial(_process_utterance, out_dir, index, wav_path, labels_path, text) futures.append(executor.submit(task)) index += 1 results = [future.result() for future in tqdm(futures)] return [r for r in results if r is not None]
def __iter__(self): p = ProcessPoolExecutor(self.workers) cacheDir = os.path.join('cache', 'normalizedDocs') cacheDict = {} futures = OrderedDict() if os.path.exists(cacheDir): keySet = set(os.listdir(cacheDir)) logging.debug('Read keySet from cache directory') else: keySet = set() self.mkDir(cacheDir) logging.debug('Cache is empty. Begin with empty keySet') for doc in self.docGenerator: if 'lang' not in doc or 'filename' not in doc \ or doc['lang'] != self.lang or 'plaintext' not in doc: logging.debug('Omitting document. Important parameters missing') continue text = doc['plaintext'] filename = doc['filename'] if not self.normalizeText: yield TaggedDocument(words=text.split(), tags=[filename]) else: if filename in keySet and os.path.exists(os.path.join(cacheDir, filename)): # the file has already been normalized, let's # read the cache with open(os.path.join(cacheDir, filename)) as fh: logging.debug('Yielded from Cache') yield LabeledSentence(words=json.load(fh).split(), tags=[filename]) else: futures[filename] = p.submit(normalize, text, lang=doc['lang']) if self.normalizeText: for k, v in futures.items(): v = v.result() keySet.add(k) with open(os.path.join(cacheDir, k), 'w') as fh: json.dump(v, fh) logging.debug('Yielded from Calculation') yield LabeledSentence(words=v.split(), tags=[k])
def infer_all(db_name): db = pymongo.MongoClient('127.0.0.1', 27017, connect=False).get_database(db_name) executor = ProcessPoolExecutor(max_workers=10) futures = [] for collection_name in db.collection_names(): if not is_q_col(collection_name): continue tid = collection_name[:-2] q_collection = db[collection_name] a_collection = db[q_to_a(collection_name)] for q_doc in q_collection.find({}, {'qid':1, 'topic':1}): qid = q_doc['qid'] aids = [a_doc['aid'] for a_doc in a_collection.find({'qid': qid}, {'aid': 1})] futures.append( executor.submit(infer_question_task(db_name, tid, qid, aids)) ) executor.shutdown()
def compute_many(hashes, n_cpus=1, *args, **kwargs): if n_cpus != 1: pool = ProcessPoolExecutor(max_workers=n_cpus) futures = [] for h in hashes: futures.append(pool.submit(compute_single, *args, hash=h, **kwargs)) # Wait fot the futures to complete; give a progress bar with tqdm(total=len(futures), desc='Computing on %d cores' % n_cpus) as pbar: while len(futures): _done_is = [] for f_i, f in enumerate(futures): if f.done(): f.result() # Raises exception on error _done_is.append(f_i) pbar.update(1) futures = [f for f_i, f in enumerate(futures) if not f_i in _done_is] time.sleep(0.1) else: for h in tqdm(hashes, desc='Computing on one core'): compute_single(h, *args, **kwargs)
def infer_many(db_name, filename): """ 推断一些问题的回答, 读取文件, 每一行格式为 topic,qid,...(后面是什么无所谓) """ db = pymongo.MongoClient('127.0.0.1', 27017, connect=False).get_database(db_name) executor = ProcessPoolExecutor(max_workers=5) count = 0 futures = [] with open(filename) as f: for line in f: tid, qid, _ = line.split(',', maxsplit=2) a_collection = db[a_col(tid)] aids = [a_doc['aid'] for a_doc in a_collection.find({'qid': qid}, {'aid': 1})] futures.append( executor.submit(infer_question_task, db_name, tid, qid, aids) ) count += len(aids) print(count) executor.shutdown()
class ConcurrentCrawler(BaseCrawler, ConcurrentMixin): """Concurrent ProcessPoolExecutor crawler :param pool_size: pool size of ProcessPoolExecutor :param timeout: request timeout in seconds """ def __init__(self, worker_class, worker_kwargs=None, pool_size=5): # configure executor self.pool_size = pool_size self.executor = ProcessPoolExecutor(max_workers=self.pool_size) # prepare worker params self.worker_params = { 'worker_class': worker_class, 'worker_kwargs': worker_kwargs or {}, } # inherit ENTRY_REQUESTS from worker_class self.ENTRY_REQUESTS = getattr(worker_class, 'ENTRY_REQUESTS', None) def process(self, response): # delegate response processing to the executor future = self.executor.submit( _run_crawler_worker, self.worker_params, response, ) # build Planned object done_future = Planned() # when executor finish response processing - fire done_future future.add_done_callback( partial(self._done, response, done_future) ) return done_future
hp.processed_corpus_path, file_name), move_path) print("Move Done.") if not os.path.exists("output"): os.mkdir("output") executor = ProcessPoolExecutor(max_workers=cpu_count()) futures = list() # futures.append(executor.submit( # partial(prepare_txt, save_name_list[ind], list_P))) for one_speaker_path in os.listdir(hp.vctk_wav_path): # logger = align_wav.align_wavs(os.path.join( # hp.vctk_wav_path, one_speaker_path), "words_dict.txt", "output") futures.append(executor.submit(partial(align_wav.align_wavs, os.path.join( hp.vctk_wav_path, one_speaker_path), "words_dict.txt", "output"))) # print(logger.read()) for future in futures: future.result() # print(logger.read()) # Cut Wav if not os.path.exists(hp.new_wav_path): os.mkdir(hp.new_wav_path) for ind, textgrid_name in enumerate(os.listdir(hp.output_file_name)): if textgrid_name[0] == "p": new_wav_folder = os.path.join(hp.new_wav_path, textgrid_name[0:4]) if not os.path.exists(new_wav_folder): os.mkdir(new_wav_folder)
def run_in_process(sync_fn, *args): pool = ProcessPoolExecutor(max_workers=1) result = yield pool.submit(sync_fn, *args) pool.shutdown() return result
def _run_tests(all_tests, log_name_base, extra_args): global stop, executor, futures, system_compiler xmlname = log_name_base + '.xml' junit_root = ET.Element('testsuites') conf_time = 0 build_time = 0 test_time = 0 passing_tests = 0 failing_tests = 0 skipped_tests = 0 commands = (compile_commands, clean_commands, install_commands, uninstall_commands) try: # This fails in some CI environments for unknown reasons. num_workers = multiprocessing.cpu_count() except Exception as e: print('Could not determine number of CPUs due to the following reason:' + str(e)) print('Defaulting to using only one process') num_workers = 1 # Due to Ninja deficiency, almost 50% of build time # is spent waiting. Do something useful instead. # # Remove this once the following issue has been resolved: # https://github.com/mesonbuild/meson/pull/2082 num_workers *= 2 executor = ProcessPoolExecutor(max_workers=num_workers) for name, test_cases, skipped in all_tests: current_suite = ET.SubElement(junit_root, 'testsuite', {'name': name, 'tests': str(len(test_cases))}) print() if skipped: print(bold('Not running %s tests.' % name)) else: print(bold('Running %s tests.' % name)) print() futures = [] for t in test_cases: # Jenkins screws us over by automatically sorting test cases by name # and getting it wrong by not doing logical number sorting. (testnum, testbase) = os.path.split(t)[-1].split(' ', 1) testname = '%.3d %s' % (int(testnum), testbase) should_fail = False if name.startswith('failing'): should_fail = name.split('failing-')[1] result = executor.submit(run_test, skipped, t, extra_args, system_compiler, backend, backend_flags, commands, should_fail) futures.append((testname, t, result)) for (testname, t, result) in futures: sys.stdout.flush() result = result.result() if (result is None) or (('MESON_SKIP_TEST' in result.stdo) and (skippable(name, t))): print(yellow('Skipping:'), t) current_test = ET.SubElement(current_suite, 'testcase', {'name': testname, 'classname': name}) ET.SubElement(current_test, 'skipped', {}) skipped_tests += 1 else: without_install = "" if len(install_commands) > 0 else " (without install)" if result.msg != '': print(red('Failed test{} during {}: {!r}'.format(without_install, result.step.name, t))) print('Reason:', result.msg) failing_tests += 1 if result.step == BuildStep.configure and result.mlog != no_meson_log_msg: # For configure failures, instead of printing stdout, # print the meson log if available since it's a superset # of stdout and often has very useful information. failing_logs.append(result.mlog) else: failing_logs.append(result.stdo) failing_logs.append(result.stde) else: print('Succeeded test%s: %s' % (without_install, t)) passing_tests += 1 conf_time += result.conftime build_time += result.buildtime test_time += result.testtime total_time = conf_time + build_time + test_time log_text_file(logfile, t, result.stdo, result.stde) current_test = ET.SubElement(current_suite, 'testcase', {'name': testname, 'classname': name, 'time': '%.3f' % total_time}) if result.msg != '': ET.SubElement(current_test, 'failure', {'message': result.msg}) stdoel = ET.SubElement(current_test, 'system-out') stdoel.text = result.stdo stdeel = ET.SubElement(current_test, 'system-err') stdeel.text = result.stde print("\nTotal configuration time: %.2fs" % conf_time) print("Total build time: %.2fs" % build_time) print("Total test time: %.2fs" % test_time) ET.ElementTree(element=junit_root).write(xmlname, xml_declaration=True, encoding='UTF-8') return passing_tests, failing_tests, skipped_tests
def main(): global timeout_sent args = parse_arguments() random.seed(args.seed + args.local_rank) np.random.seed(args.seed + args.local_rank) torch.manual_seed(args.seed + args.local_rank) torch.cuda.manual_seed(args.seed + args.local_rank) worker_init = WorkerInitObj(args.seed + args.local_rank) device, args = setup_training(args) # Prepare optimizer ( model, optimizer, lr_scheduler, checkpoint, global_step, criterion, ) = prepare_model_and_optimizer(args, device) raw_train_start = None most_recent_ckpts_paths = [] average_loss = 0.0 # averaged loss every args.log_freq steps epoch = 0 training_steps = 0 test_losses = [] pool = ProcessPoolExecutor(1) # Note: We loop infinitely over epochs, termination is handled via iteration count while True: thread = None restored_data_loader = None if (not args.resume_from_checkpoint or epoch > 0 or (args.phase2 and global_step < 1) or args.init_checkpoint): files = [ os.path.join(args.input_dir, f) for f in os.listdir(args.input_dir) if os.path.isfile(os.path.join(args.input_dir, f)) and "training" in f ] files.sort() num_files = len(files) random.Random(args.seed + epoch).shuffle(files) f_start_id = 0 else: f_start_id = checkpoint["files"][0] files = checkpoint["files"][1:] args.resume_from_checkpoint = False num_files = len(files) # may not exist in all checkpoints epoch = checkpoint.get("epoch", 0) restored_dataloader = checkpoint.get("data_loader", None) shared_file_list = {} if smp.is_initialized(): dpsize = smp.dp_size() dprank = smp.dp_rank() elif torch.distributed.is_initialized(): dpsize = get_world_size() dprank = get_rank() else: dpsize = 1 dprank = 0 dparallel = dpsize > 1 if dparallel and dpsize > num_files: remainder = dpsize % num_files data_file = files[(f_start_id * dpsize + dprank + remainder * f_start_id) % num_files] else: data_file = files[(f_start_id * dpsize + dprank) % num_files] previous_file = data_file if restored_data_loader is None: train_data = pretraining_dataset(data_file, args.max_predictions_per_seq) train_sampler = RandomSampler(train_data) train_dataloader = DataLoader( train_data, sampler=train_sampler, batch_size=args.train_batch_size * args.n_gpu, num_workers=4, worker_init_fn=worker_init, pin_memory=True, drop_last=True, ) # shared_file_list["0"] = (train_dataloader, data_file) else: train_dataloader = restored_data_loader restored_data_loader = None overflow_buf = None if args.allreduce_post_accumulation: overflow_buf = torch.cuda.IntTensor([0]) for f_id in range(f_start_id + 1, len(files)): if get_world_size() > num_files: data_file = files[(f_id * get_world_size() + get_rank() + remainder * f_id) % num_files] else: data_file = files[(f_id * get_world_size() + get_rank()) % num_files] previous_file = data_file dataset_future = pool.submit( create_pretraining_dataset, data_file, args.max_predictions_per_seq, shared_file_list, args, worker_init, ) train_iter = (tqdm(train_dataloader, desc="Iteration", disable=args.disable_progress_bar) if is_main_process() else train_dataloader) if raw_train_start is None: raw_train_start = time.time() for step, batch in enumerate(train_iter): training_steps += 1 batch = [t.to(device) for t in batch] input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels = batch if args.do_train: from smdistributed.modelparallel.test.torch.utils import dump_model, verify model.train() if args.smp > 0: loss_mbs = smp_step( args, device, input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels, model, optimizer, criterion, step, ) loss = loss_mbs.reduce_mean() if smp.rank() == 0: print("Loss:", loss.item()) else: loss = train_step( args, device, input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels, model, optimizer, criterion, step, ) divisor = 1 average_loss += loss.item() if training_steps % args.gradient_accumulation_steps == 0: lr_scheduler.step() # learning rate warmup global_step = take_optimizer_step( args, optimizer, model, overflow_buf, global_step) if global_step >= args.steps_this_run or timeout_sent: train_time_raw = time.time() - raw_train_start last_num_steps = (int( training_steps / args.gradient_accumulation_steps) % args.log_freq) last_num_steps = args.log_freq if last_num_steps == 0 else last_num_steps average_loss = torch.tensor( average_loss, dtype=torch.float32).cuda() average_loss = average_loss / (last_num_steps * divisor) if torch.distributed.is_initialized(): average_loss /= get_world_size() torch.distributed.all_reduce(average_loss) final_loss = loss.item() elif training_steps % ( args.log_freq * args.gradient_accumulation_steps) == 0: average_loss = 0 if (global_step >= args.steps_this_run or training_steps % (args.num_steps_per_checkpoint * args.gradient_accumulation_steps) == 0 or timeout_sent): if smp.dp_rank() == 0 and not args.skip_checkpoint: if args.resume_step < 0 or not args.phase2: output_save_file = os.path.join( args.output_dir, "ckpt_{}.pt".format(global_step)) else: output_save_file = os.path.join( args.output_dir, "ckpt_{}.pt".format(global_step + args.phase1_end_step), ) if args.do_train: save_dict = { "model": model.local_state_dict(), "optimizer": optimizer.local_state_dict(), "files": [f_id] + files, "epoch": epoch, "data_loader": None if global_step >= args.steps_this_run else train_dataloader, } if args.fp16: save_dict["master params"] = list( amp.master_params(optimizer)) # SMP: Checkpoint mp_rank specific state smp.save(save_dict, output_save_file, partial=True) most_recent_ckpts_paths.append( output_save_file) if len(most_recent_ckpts_paths) > 3 and ( args.smp == 0 or smp.dp_rank() == 0): ckpt_to_be_removed = most_recent_ckpts_paths.pop( 0) os.remove(ckpt_to_be_removed + f"_{smp.mp_rank()}") # Exiting the training due to hitting max steps, or being sent a # timeout from the cluster scheduler if global_step >= args.steps_this_run or timeout_sent: del train_dataloader # thread.join() if smp.dp_rank() == 0 and args.save_full: output_save_file = os.path.join( args.output_dir, "ckpt_{}.pt".format(global_step)) save_dict = { "model": model.local_state_dict(), "optimizer": optimizer.local_state_dict(), "files": [f_id] + files, "epoch": epoch, "data_loader": None if global_step >= args.steps_this_run else train_dataloader, } if args.fp16: save_dict["master params"] = list( amp.master_params(optimizer)) # SMP: Save a single checkpoint containing entire model parameters smp.save(save_dict, output_save_file, partial=False) smp.barrier() if smp.local_rank() == 0: print(f"Start syncing model checkpoints to s3") base_s3_path = os.path.dirname( os.path.dirname( os.getenv("SM_MODULE_DIR", ""))) curr_host = os.getenv("SM_CURRENT_HOST") full_s3_path = f"{base_s3_path}/checkpoints/{curr_host}/" sync_local_checkpoints_to_s3( local_path=args.output_dir, s3_path=full_s3_path) print( f"Finished syncing model checkpoints to s3" ) return args, final_loss, train_time_raw, global_step else: model.eval() with torch.no_grad(): loss = test_step( args, device, input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels, model, criterion, step, ) print(f"global_step {global_step} Test Loss:", loss) test_losses.append(loss) global_step += 1 if global_step >= args.steps_this_run: return sum(test_losses) / len(test_losses) del train_dataloader # thread.join() # Make sure pool has finished and switch train_dataloader # NOTE: Will block until complete train_dataloader, data_file = dataset_future.result(timeout=None) epoch += 1
def _run_tests(all_tests, log_name_base, extra_args): global stop, executor, futures, system_compiler xmlname = log_name_base + '.xml' junit_root = ET.Element('testsuites') conf_time = 0 build_time = 0 test_time = 0 passing_tests = 0 failing_tests = 0 skipped_tests = 0 commands = (compile_commands, clean_commands, install_commands, uninstall_commands) try: # This fails in some CI environments for unknown reasons. num_workers = multiprocessing.cpu_count() except Exception as e: print('Could not determine number of CPUs due to the following reason:' + str(e)) print('Defaulting to using only one process') num_workers = 1 # Due to Ninja deficiency, almost 50% of build time # is spent waiting. Do something useful instead. # # Remove this once the following issue has been resolved: # https://github.com/mesonbuild/meson/pull/2082 num_workers *= 2 executor = ProcessPoolExecutor(max_workers=num_workers) for name, test_cases, skipped in all_tests: current_suite = ET.SubElement(junit_root, 'testsuite', {'name': name, 'tests': str(len(test_cases))}) print() if skipped: print(bold('Not running %s tests.' % name)) else: print(bold('Running %s tests.' % name)) print() futures = [] for t in test_cases: # Jenkins screws us over by automatically sorting test cases by name # and getting it wrong by not doing logical number sorting. (testnum, testbase) = t.name.split(' ', 1) testname = '%.3d %s' % (int(testnum), testbase) should_fail = False if name.startswith('failing'): should_fail = name.split('failing-')[1] result = executor.submit(run_test, skipped, t.as_posix(), extra_args, system_compiler, backend, backend_flags, commands, should_fail) futures.append((testname, t, result)) for (testname, t, result) in futures: sys.stdout.flush() result = result.result() if (result is None) or (('MESON_SKIP_TEST' in result.stdo) and (skippable(name, t.as_posix()))): print(yellow('Skipping:'), t.as_posix()) current_test = ET.SubElement(current_suite, 'testcase', {'name': testname, 'classname': name}) ET.SubElement(current_test, 'skipped', {}) skipped_tests += 1 else: without_install = "" if len(install_commands) > 0 else " (without install)" if result.msg != '': print(red('Failed test{} during {}: {!r}'.format(without_install, result.step.name, t.as_posix()))) print('Reason:', result.msg) failing_tests += 1 if result.step == BuildStep.configure and result.mlog != no_meson_log_msg: # For configure failures, instead of printing stdout, # print the meson log if available since it's a superset # of stdout and often has very useful information. failing_logs.append(result.mlog) else: failing_logs.append(result.stdo) failing_logs.append(result.stde) else: print('Succeeded test%s: %s' % (without_install, t.as_posix())) passing_tests += 1 conf_time += result.conftime build_time += result.buildtime test_time += result.testtime total_time = conf_time + build_time + test_time log_text_file(logfile, t, result.stdo, result.stde) current_test = ET.SubElement(current_suite, 'testcase', {'name': testname, 'classname': name, 'time': '%.3f' % total_time}) if result.msg != '': ET.SubElement(current_test, 'failure', {'message': result.msg}) stdoel = ET.SubElement(current_test, 'system-out') stdoel.text = result.stdo stdeel = ET.SubElement(current_test, 'system-err') stdeel.text = result.stde print("\nTotal configuration time: %.2fs" % conf_time) print("Total build time: %.2fs" % build_time) print("Total test time: %.2fs" % test_time) ET.ElementTree(element=junit_root).write(xmlname, xml_declaration=True, encoding='UTF-8') return passing_tests, failing_tests, skipped_tests
year = args.year[0] month = args.month[0] sensitivity_file = Path("/home/simon/GMI_ERA5_V7_chansens18.txt") output_path = Path(f"/gdata/simon/validation/gprof/{year}/{month:02}") output_path.mkdir(exist_ok=True, parents=True) log_path = Path("/home/simon/src/GPROF_2020_V1_4D_prf/log/") # Find validation files. path = Path(f"/gdata/simon/validation/preprocessor/{year}/{month:02}") files = path.glob("*.pp") def run_retrieval(f): stem = f.stem subprocess.run([ "GPROF_2020_V1", str(f), str(output_path / (stem + ".BIN")), str(log_path / (stem + ".log")), "/qdata1/pbrown/gpm/ancillary/", "0" ]) pool = ProcessPoolExecutor(max_workers=42) tasks = [] for f in files: tasks.append(pool.submit(run_retrieval, f)) for t in tqdm(tasks): t.result()
import os, time, random ''' submit: 异步提交任务 shutdown(wait=True): 相当于进程池的pool.close()+pool.join()操作 wait=True,等待池内所有任务执行完毕回收完资源后才继续 wait=False,立即返回,并不会等待池内的任务执行完毕 但不管wait参数为何值,整个程序都会等到所有任务执行完毕 submit和map必须在shutdown之前 ''' def task(n): print(f'{os.getpid()} is running') time.sleep(random.randint(1, 3)) return n**2 if __name__ == '__main__': executor = ProcessPoolExecutor(max_workers=3) futures = [] for i in range(11): future = executor.submit(task, i) futures.append(future) executor.shutdown() print('----- result is -----') for future in futures: print(future.result())
# def process_go(*namelist): # tasks=[] # loop=asyncio.get_event_loop() # for name in namelist: # tasks.append(asyncio.ensure_future(hello(name))) # loop.run_until_complete(asyncio.wait(tasks)) if __name__ == '__main__': # executor = ThreadPoolExecutor(max_workers=3) executor = ProcessPoolExecutor(max_workers=3) f_list = [] for url in URLS: '''asyncio.ensure_future(''' # future = executor.submit(load_url,url) future = executor.submit(load_url, url) f_list.append(future) # 如果采用默认的ALL_COMPLETED,程序会阻塞直到线程池里面的所有任务都完成,再执行主线程: #如果采用FIRST_COMPLETED参数,程序并不会等到线程池里面所有的任务都完成。 # for i, future in enumerate(as_completed(f_list, timeout=2400)): # # data = future.result() # print('data',data) #返回结果是个tuple,tuple里是两个集合,第一个是已经完成的,第二个是还没有完成的 res_list = wait(f_list, return_when='FIRST_COMPLETED') # print('resss',res_list.result()) # for each in res_list: # each.result() print('res', type(res_list), res_list) print('00', res_list[0])
class Laikad: def __init__(self, valid_const=("GPS", "GLONASS"), auto_update=False, valid_ephem_types=(EphemerisType.ULTRA_RAPID_ORBIT, EphemerisType.NAV), save_ephemeris=False, last_known_position=None): self.astro_dog = AstroDog(valid_const=valid_const, auto_update=auto_update, valid_ephem_types=valid_ephem_types, clear_old_ephemeris=True) self.gnss_kf = GNSSKalman(GENERATED_DIR) self.orbit_fetch_executor = ProcessPoolExecutor() self.orbit_fetch_future: Optional[Future] = None self.last_fetch_orbits_t = None self.last_cached_t = None self.save_ephemeris = save_ephemeris self.load_cache() self.posfix_functions = {constellation: get_posfix_sympy_fun(constellation) for constellation in (ConstellationId.GPS, ConstellationId.GLONASS)} self.last_pos_fix = last_known_position if last_known_position is not None else [] self.last_pos_residual = [] self.last_pos_fix_t = None def load_cache(self): cache = Params().get(EPHEMERIS_CACHE) if not cache: return try: cache = json.loads(cache, object_hook=deserialize_hook) self.astro_dog.add_orbits(cache['orbits']) self.astro_dog.add_navs(cache['nav']) self.last_fetch_orbits_t = cache['last_fetch_orbits_t'] except json.decoder.JSONDecodeError: cloudlog.exception("Error parsing cache") def cache_ephemeris(self, t: GPSTime): if self.save_ephemeris and (self.last_cached_t is None or t - self.last_cached_t > SECS_IN_MIN): put_nonblocking(EPHEMERIS_CACHE, json.dumps( {'version': CACHE_VERSION, 'last_fetch_orbits_t': self.last_fetch_orbits_t, 'orbits': self.astro_dog.orbits, 'nav': self.astro_dog.nav}, cls=CacheSerializer)) self.last_cached_t = t def process_ublox_msg(self, ublox_msg, ublox_mono_time: int, block=False): if ublox_msg.which == 'measurementReport': t = ublox_mono_time * 1e-9 report = ublox_msg.measurementReport if report.gpsWeek > 0: latest_msg_t = GPSTime(report.gpsWeek, report.rcvTow) self.fetch_orbits(latest_msg_t + SECS_IN_MIN, block) new_meas = read_raw_ublox(report) processed_measurements = process_measurements(new_meas, self.astro_dog) if self.last_pos_fix_t is None or abs(self.last_pos_fix_t - t) >= 2: min_measurements = 5 if any(p.constellation_id == ConstellationId.GLONASS for p in processed_measurements) else 4 pos_fix, pos_fix_residual = calc_pos_fix_gauss_newton(processed_measurements, self.posfix_functions, min_measurements=min_measurements) if len(pos_fix) > 0: self.last_pos_fix = pos_fix[:3] self.last_pos_residual = pos_fix_residual self.last_pos_fix_t = t corrected_measurements = correct_measurements(processed_measurements, self.last_pos_fix, self.astro_dog) if self.last_pos_fix_t is not None else [] self.update_localizer(self.last_pos_fix, t, corrected_measurements) kf_valid = all(self.kf_valid(t)) ecef_pos = self.gnss_kf.x[GStates.ECEF_POS].tolist() ecef_vel = self.gnss_kf.x[GStates.ECEF_VELOCITY].tolist() pos_std = np.sqrt(abs(self.gnss_kf.P[GStates.ECEF_POS].diagonal())).tolist() vel_std = np.sqrt(abs(self.gnss_kf.P[GStates.ECEF_VELOCITY].diagonal())).tolist() meas_msgs = [create_measurement_msg(m) for m in corrected_measurements] dat = messaging.new_message("gnssMeasurements") measurement_msg = log.LiveLocationKalman.Measurement.new_message dat.gnssMeasurements = { "gpsWeek": report.gpsWeek, "gpsTimeOfWeek": report.rcvTow, "positionECEF": measurement_msg(value=ecef_pos, std=pos_std, valid=kf_valid), "velocityECEF": measurement_msg(value=ecef_vel, std=vel_std, valid=kf_valid), "positionFixECEF": measurement_msg(value=self.last_pos_fix, std=self.last_pos_residual, valid=self.last_pos_fix_t == t), "ubloxMonoTime": ublox_mono_time, "correctedMeasurements": meas_msgs } return dat elif ublox_msg.which == 'ephemeris': ephem = convert_ublox_ephem(ublox_msg.ephemeris) self.astro_dog.add_navs({ephem.prn: [ephem]}) self.cache_ephemeris(t=ephem.epoch) # elif ublox_msg.which == 'ionoData': # todo add this. Needed to better correct messages offline. First fix ublox_msg.cc to sent them. def update_localizer(self, est_pos, t: float, measurements: List[GNSSMeasurement]): # Check time and outputs are valid valid = self.kf_valid(t) if not all(valid): if not valid[0]: cloudlog.info("Init gnss kalman filter") elif not valid[1]: cloudlog.error("Time gap of over 10s detected, gnss kalman reset") elif not valid[2]: cloudlog.error("Gnss kalman filter state is nan") if len(est_pos) > 0: cloudlog.info(f"Reset kalman filter with {est_pos}") self.init_gnss_localizer(est_pos) else: cloudlog.info("Could not reset kalman filter") return if len(measurements) > 0: kf_add_observations(self.gnss_kf, t, measurements) else: # Ensure gnss filter is updated even with no new measurements self.gnss_kf.predict(t) def kf_valid(self, t: float): filter_time = self.gnss_kf.filter.filter_time return [filter_time is not None, filter_time is not None and abs(t - filter_time) < MAX_TIME_GAP, all(np.isfinite(self.gnss_kf.x[GStates.ECEF_POS]))] def init_gnss_localizer(self, est_pos): x_initial, p_initial_diag = np.copy(GNSSKalman.x_initial), np.copy(np.diagonal(GNSSKalman.P_initial)) x_initial[GStates.ECEF_POS] = est_pos p_initial_diag[GStates.ECEF_POS] = 1000 ** 2 self.gnss_kf.init_state(x_initial, covs_diag=p_initial_diag) def fetch_orbits(self, t: GPSTime, block): if t not in self.astro_dog.orbit_fetched_times and (self.last_fetch_orbits_t is None or t - self.last_fetch_orbits_t > SECS_IN_HR): astro_dog_vars = self.astro_dog.valid_const, self.astro_dog.auto_update, self.astro_dog.valid_ephem_types if self.orbit_fetch_future is None: self.orbit_fetch_future = self.orbit_fetch_executor.submit(get_orbit_data, t, *astro_dog_vars) if block: self.orbit_fetch_future.result() if self.orbit_fetch_future.done(): ret = self.orbit_fetch_future.result() self.last_fetch_orbits_t = t if ret: self.astro_dog.orbits, self.astro_dog.orbit_fetched_times = ret self.cache_ephemeris(t=t) self.orbit_fetch_future = None
fill = [fillAd, fillAe, fillCd, fillCe] H, A, B = matrizes.generate(b) matrix = matrizes.matrix() h = matrix[0:7, 5] iterations = 108 allOccupancies = [1, 5, 10, 20, 30, 40, 50, 60, 90] allMethods = ['FIR', 'MF', 'MP', 'OMP', 'LS-OMP', 'GD', 'SSF', 'PCD', 'TAS', 'GDi', 'SSFi', 'PCDi', 'TASi'] # occupancies utilized in some articles occupancies = [30, 60, 90] # main methods of all families tested methods = ['FIR', 'LS-OMP', 'TAS', 'PCDi'] const = collections.OrderedDict( {'methods': methods, 'iterations': iterations, 'b': b, 'e': e, 'h': h, 'H': H, 'A': A, 'B': B, 'fill': fill, 'matrix': matrix, 'window': window, 'totalSamples': totalSamples}) testSystem = TestSystem(const) m = Manager() lock = m.Lock() pool = ProcessPoolExecutor() futures = [pool.submit(test, const, occupancy, lock) for occupancy in occupancies] for future in futures: testSystem.addData(future.result()) # testSystem.graphViewer(methods, occupancies, 'ROC') testSystem.graphViewer(methods, occupancies, 'RMS')
class DataRouter(object): def __init__(self, project_dir=None, max_training_processes=1, response_log=None, emulation_mode=None, remote_storage=None, component_builder=None, model_server=None, wait_time_between_pulls=None): self._training_processes = max(max_training_processes, 1) self._current_training_processes = 0 self.responses = self._create_query_logger(response_log) self.project_dir = config.make_path_absolute(project_dir) self.emulator = self._create_emulator(emulation_mode) self.remote_storage = remote_storage self.model_server = model_server self.wait_time_between_pulls = wait_time_between_pulls if component_builder: self.component_builder = component_builder else: self.component_builder = ComponentBuilder(use_cache=True) self.project_store = self._create_project_store(project_dir) if six.PY3: # tensorflow sessions are not fork-safe, # and training processes have to be spawned instead of forked. # See https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405 multiprocessing.set_start_method('spawn', force=True) self.pool = ProcessPool(self._training_processes) def __del__(self): """Terminates workers pool processes""" self.pool.shutdown() @staticmethod def _create_query_logger(response_log): """Create a logger that will persist incoming query results.""" # Ensures different log files for different # processes in multi worker mode if response_log: # We need to generate a unique file name, # even in multiprocess environments timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') log_file_name = "rasa_nlu_log-{}-{}.log".format(timestamp, os.getpid()) response_logfile = os.path.join(response_log, log_file_name) # Instantiate a standard python logger, # which we are going to use to log requests utils.create_dir_for_file(response_logfile) out_file = io.open(response_logfile, 'a', encoding='utf8') query_logger = Logger( observer=jsonFileLogObserver(out_file, recordSeparator=''), namespace='query-logger') # Prevents queries getting logged with parent logger # --> might log them to stdout logger.info("Logging requests to '{}'.".format(response_logfile)) return query_logger else: # If the user didn't provide a logging directory, we wont log! logger.info("Logging of requests is disabled. " "(No 'request_log' directory configured)") return None def _collect_projects(self, project_dir): if project_dir and os.path.isdir(project_dir): projects = os.listdir(project_dir) else: projects = [] projects.extend(self._list_projects_in_cloud()) return projects def _create_project_store(self, project_dir): default_project = RasaNLUModelConfig.DEFAULT_PROJECT_NAME projects = self._collect_projects(project_dir) project_store = {} if self.model_server is not None: project_store[default_project] = load_from_server( self.component_builder, default_project, self.project_dir, self.remote_storage, self.model_server, self.wait_time_between_pulls ) else: for project in projects: project_store[project] = Project(self.component_builder, project, self.project_dir, self.remote_storage) if not project_store: project_store[default_project] = Project( project=default_project, project_dir=self.project_dir, remote_storage=self.remote_storage ) return project_store def _pre_load(self, projects): logger.debug("loading %s", projects) for project in self.project_store: if project in projects: self.project_store[project].load_model() def _list_projects_in_cloud(self): try: from rasa_nlu.persistor import get_persistor p = get_persistor(self.remote_storage) if p is not None: return p.list_projects() else: return [] except Exception: logger.exception("Failed to list projects. Make sure you have " "correctly configured your cloud storage " "settings.") return [] @staticmethod def _create_emulator(mode): """Create emulator for specified mode. If no emulator is specified, we will use the Rasa NLU format.""" if mode is None: from rasa_nlu.emulators import NoEmulator return NoEmulator() elif mode.lower() == 'wit': from rasa_nlu.emulators.wit import WitEmulator return WitEmulator() elif mode.lower() == 'luis': from rasa_nlu.emulators.luis import LUISEmulator return LUISEmulator() elif mode.lower() == 'dialogflow': from rasa_nlu.emulators.dialogflow import DialogflowEmulator return DialogflowEmulator() else: raise ValueError("unknown mode : {0}".format(mode)) @staticmethod def _tf_in_pipeline(model_config): # type: (RasaNLUModelConfig) -> bool from rasa_nlu.classifiers.embedding_intent_classifier import \ EmbeddingIntentClassifier return EmbeddingIntentClassifier.name in model_config.component_names def extract(self, data): return self.emulator.normalise_request_json(data) def parse(self, data): project = data.get("project", RasaNLUModelConfig.DEFAULT_PROJECT_NAME) model = data.get("model") if project not in self.project_store: projects = self._list_projects(self.project_dir) cloud_provided_projects = self._list_projects_in_cloud() projects.extend(cloud_provided_projects) if project not in projects: raise InvalidProjectError( "No project found with name '{}'.".format(project)) else: try: self.project_store[project] = Project( self.component_builder, project, self.project_dir, self.remote_storage) except Exception as e: raise InvalidProjectError( "Unable to load project '{}'. " "Error: {}".format(project, e)) time = data.get('time') response = self.project_store[project].parse(data['text'], time, model) if self.responses: self.responses.info('', user_input=response, project=project, model=response.get('model')) return self.format_response(response) @staticmethod def _list_projects(path): """List the projects in the path, ignoring hidden directories.""" return [os.path.basename(fn) for fn in utils.list_subdirectories(path)] def parse_training_examples(self, examples, project, model): # type: (Optional[List[Message]], Text, Text) -> List[Dict[Text, Text]] """Parses a list of training examples to the project interpreter""" predictions = [] for ex in examples: logger.debug("Going to parse: {}".format(ex.as_dict())) response = self.project_store[project].parse(ex.text, None, model) logger.debug("Received response: {}".format(response)) predictions.append(response) return predictions def format_response(self, data): return self.emulator.normalise_response_json(data) def get_status(self): # This will only count the trainings started from this # process, if run in multi worker mode, there might # be other trainings run in different processes we don't know about. return { "max_training_processes": self._training_processes, "current_training_processes": self._current_training_processes, "available_projects": { name: project.as_dict() for name, project in self.project_store.items() } } def start_train_process(self, data_file, # type: Text project, # type: Text train_config, # type: RasaNLUModelConfig model_name=None # type: Optional[Text] ): # type: (...) -> Deferred """Start a model training.""" if not project: raise InvalidProjectError("Missing project name to train") if self._training_processes <= self._current_training_processes: raise MaxTrainingError if project in self.project_store: self.project_store[project].status = 1 elif project not in self.project_store: self.project_store[project] = Project( self.component_builder, project, self.project_dir, self.remote_storage) self.project_store[project].status = 1 def training_callback(model_path): model_dir = os.path.basename(os.path.normpath(model_path)) self.project_store[project].update(model_dir) self._current_training_processes -= 1 self.project_store[project].current_training_processes -= 1 if (self.project_store[project].status == 1 and self.project_store[project].current_training_processes == 0): self.project_store[project].status = 0 return model_dir def training_errback(failure): logger.warning(failure) target_project = self.project_store.get( failure.value.failed_target_project) self._current_training_processes -= 1 self.project_store[project].current_training_processes -= 1 if (target_project and self.project_store[project].current_training_processes == 0): target_project.status = 0 return failure logger.debug("New training queued") self._current_training_processes += 1 self.project_store[project].current_training_processes += 1 # tensorflow training is not executed in a separate thread on python 2, # as this may cause training to freeze if six.PY2 and self._tf_in_pipeline(train_config): try: logger.warning("Training a pipeline with a tensorflow " "component. This blocks the server during " "training.") model_path = do_train_in_worker( train_config, data_file, path=self.project_dir, project=project, fixed_model_name=model_name, storage=self.remote_storage) model_dir = os.path.basename(os.path.normpath(model_path)) training_callback(model_dir) return model_dir except TrainingException as e: logger.warning(e) target_project = self.project_store.get( e.failed_target_project) if target_project: target_project.status = 0 raise e else: result = self.pool.submit(do_train_in_worker, train_config, data_file, path=self.project_dir, project=project, fixed_model_name=model_name, storage=self.remote_storage) result = deferred_from_future(result) result.addCallback(training_callback) result.addErrback(training_errback) return result def evaluate(self, data, project=None, model=None): # type: (Text, Optional[Text], Optional[Text]) -> Dict[Text, Any] """Perform a model evaluation.""" project = project or RasaNLUModelConfig.DEFAULT_PROJECT_NAME model = model or None file_name = utils.create_temporary_file(data, "_training_data") test_data = load_data(file_name) if project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) preds_json = self.parse_training_examples(test_data.intent_examples, project, model) predictions = [ {"text": e.text, "intent": e.data.get("intent"), "predicted": p.get("intent", {}).get("name"), "confidence": p.get("intent", {}).get("confidence")} for e, p in zip(test_data.intent_examples, preds_json) ] y_true = [e.data.get("intent") for e in test_data.intent_examples] y_true = clean_intent_labels(y_true) y_pred = [p.get("intent", {}).get("name") for p in preds_json] y_pred = clean_intent_labels(y_pred) report, precision, f1, accuracy = get_evaluation_metrics(y_true, y_pred) return { "intent_evaluation": { "report": report, "predictions": predictions, "precision": precision, "f1_score": f1, "accuracy": accuracy} } def unload_model(self, project, model): # type: (Text, Text) -> Dict[Text] """Unload a model from server memory.""" if project is None: raise InvalidProjectError("No project specified") elif project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) try: unloaded_model = self.project_store[project].unload(model) return unloaded_model except KeyError: raise InvalidProjectError("Failed to unload model {} " "for project {}.".format(model, project))
def synthesize(hp, speaker_id='', num_sentences=0, ncores=1, topoutdir='', t2m_epoch=-1, ssrn_epoch=-1): ''' topoutdir: store samples under here; defaults to hp.sampledir t2m_epoch and ssrn_epoch: default -1 means use latest. Otherwise go to archived models. ''' assert hp.vocoder in ['griffin_lim', 'world'], 'Other vocoders than griffin_lim/world not yet supported' dataset = load_data(hp, mode="synthesis") #since mode != 'train' or 'validation', will load test_transcript rather than transcript fpaths, L = dataset['fpaths'], dataset['texts'] position_in_phone_data = duration_data = labels = None # default if hp.use_external_durations: duration_data = dataset['durations'] if num_sentences > 0: duration_data = duration_data[:num_sentences, :, :] if 'position_in_phone' in hp.history_type: ## TODO: combine + deduplicate with relevant code in train.py for making validation set def duration2position(duration, fractional=False): ### very roundabout -- need to deflate A matrix back to integers: duration = duration.sum(axis=0) #print(duration) # sys.exit('evs') positions = durations_to_position(duration, fractional=fractional) ###positions = end_pad_for_reduction_shape_sync(positions, hp) positions = positions[0::hp.r, :] #print(positions) return positions position_in_phone_data = [duration2position(dur, fractional=('fractional' in hp.history_type)) \ for dur in duration_data] position_in_phone_data = list2batch(position_in_phone_data, hp.max_T) # Ensure we aren't trying to generate more utterances than are actually in our test_transcript if num_sentences > 0: assert num_sentences <= len(fpaths) L = L[:num_sentences, :] fpaths = fpaths[:num_sentences] bases = [basename(fpath) for fpath in fpaths] if hp.merlin_label_dir: labels = [] for fpath in fpaths: label = np.load("{}/{}".format(hp.merlin_label_dir, basename(fpath)+".npy")) if hp.select_central: central_ind = get_labels_indices(hp.merlin_lab_dim) label = label[:,central_ind==1] labels.append(label) labels = list2batch(labels, hp.max_N) if speaker_id: speaker2ix = dict(zip(hp.speaker_list, range(len(hp.speaker_list)))) speaker_ix = speaker2ix[speaker_id] ## Speaker codes are held in (batch, 1) matrix -- tiling is done inside the graph: speaker_data = np.ones((len(L), 1)) * speaker_ix else: speaker_data = None if hp.turn_off_monotonic_for_synthesis: # if FIA mechanism is turn off text_lengths = get_text_lengths(L) hp.text_lengths = text_lengths + 1 # Load graph ## TODO: generalise to combine other types of models into a synthesis pipeline? g1 = Text2MelGraph(hp, mode="synthesize"); print("Graph 1 (t2m) loaded") if hp.norm == None : t2m_layer_norm = False hp.norm = 'layer' hp.lr = 0.001 hp.beta1 = 0.9 hp.beta2 = 0.999 hp.epsilon = 0.00000001 hp.decay_lr = True hp.batchsize = {'t2m': 32, 'ssrn': 8} else: t2m_layer_norm = True g2 = SSRNGraph(hp, mode="synthesize"); print("Graph 2 (ssrn) loaded") if t2m_layer_norm == False: hp.norm = None hp.lr = 0.0002 hp.beta1 = 0.5 hp.beta2 = 0.9 hp.epsilon = 0.000001 hp.decay_lr = False hp.batchsize = {'t2m': 16, 'ssrn': 8} with tf.Session() as sess: sess.run(tf.global_variables_initializer()) ### TODO: specify epoch from comm line? ### TODO: t2m and ssrn from separate configs? if t2m_epoch > -1: restore_archived_model_parameters(sess, hp, 't2m', t2m_epoch) else: t2m_epoch = restore_latest_model_parameters(sess, hp, 't2m') if ssrn_epoch > -1: restore_archived_model_parameters(sess, hp, 'ssrn', ssrn_epoch) else: ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn') # Pass input L through Text2Mel Graph t = start_clock('Text2Mel generating...') ### TODO: after futher efficiency testing, remove this fork if 1: ### efficient route -- only make K&V once ## 3.86, 3.70, 3.80 seconds (2 sentences) text_lengths = get_text_lengths(L) K, V = encode_text(hp, L, g1, sess, speaker_data=speaker_data, labels=labels) Y, lengths, alignments = synth_codedtext2mel(hp, K, V, text_lengths, g1, sess, \ speaker_data=speaker_data, duration_data=duration_data, \ position_in_phone_data=position_in_phone_data,\ labels=labels) else: ## 5.68, 5.43, 5.38 seconds (2 sentences) Y, lengths = synth_text2mel(hp, L, g1, sess, speaker_data=speaker_data, \ duration_data=duration_data, \ position_in_phone_data=position_in_phone_data, \ labels=labels) stop_clock(t) ### TODO: useful to test this? # print(Y[0,:,:]) # print (np.isnan(Y).any()) # print('nan1') # Then pass output Y of Text2Mel Graph through SSRN graph to get high res spectrogram Z. t = start_clock('Mel2Mag generating...') Z = synth_mel2mag(hp, Y, g2, sess) stop_clock(t) if (np.isnan(Z).any()): ### TODO: keep? Z = np.nan_to_num(Z) # Generate wav files if not topoutdir: topoutdir = hp.sampledir outdir = os.path.join(topoutdir, 't2m%s_ssrn%s'%(t2m_epoch, ssrn_epoch)) if speaker_id: outdir += '_speaker-%s'%(speaker_id) safe_makedir(outdir) # Plot trimmed attention alignment with filename print("Plot attention, will save to following dir: %s"%(outdir)) print("File | CDP | Ain") for i, mag in enumerate(Z): outfile = os.path.join(outdir, bases[i]) trimmed_alignment = alignments[i,:text_lengths[i],:lengths[i]] plot_alignment(hp, trimmed_alignment, utt_idx=i+1, t2m_epoch=t2m_epoch, dir=outdir, outfile=outfile) CDP = getCDP(trimmed_alignment) APin, APout = getAP(trimmed_alignment) print("%s | %.2f | %.2f"%( bases[i], CDP, APin)) print("Generating wav files, will save to following dir: %s"%(outdir)) assert hp.vocoder in ['griffin_lim', 'world'], 'Other vocoders than griffin_lim/world not yet supported' if ncores==1: for i, mag in tqdm(enumerate(Z)): outfile = os.path.join(outdir, bases[i] + '.wav') mag = mag[:lengths[i]*hp.r,:] ### trim to generated length synth_wave(hp, mag, outfile) else: executor = ProcessPoolExecutor(max_workers=ncores) futures = [] for i, mag in tqdm(enumerate(Z)): outfile = os.path.join(outdir, bases[i] + '.wav') mag = mag[:lengths[i]*hp.r,:] ### trim to generated length futures.append(executor.submit(synth_wave, hp, mag, outfile)) proc_list = [future.result() for future in tqdm(futures)]
resample_to_16k(origin_wavpath, target_wavpath, num_workers=num_workers) # WE only use 10 speakers listed below for this experiment. speaker_used = ['262', '272'] speaker_used = ['p' + i for i in speaker_used] ## Next we are to extract the acoustic features (MCEPs, lf0) and compute the corresponding stats (means, stds). # Make dirs to contain the MCEPs os.makedirs(mc_dir_train, exist_ok=True) os.makedirs(mc_dir_test, exist_ok=True) num_workers = len(speaker_used) #cpu_count() print("number of workers: ", num_workers) executor = ProcessPoolExecutor(max_workers=num_workers) work_dir = target_wavpath # spk_folders = os.listdir(work_dir) # print("processing {} speaker folders".format(len(spk_folders))) # print(spk_folders) futures = [] for spk in speaker_used: spk_path = os.path.join(work_dir, spk) futures.append( executor.submit( partial(get_spk_world_feats, spk_path, mc_dir_train, mc_dir_test, sample_rate))) result_list = [future.result() for future in tqdm(futures)] print(result_list) sys.exit(0)
import os import time from concurrent.futures import ProcessPoolExecutor def double(x): print(os.getpid()) return x * 2 if __name__ == "__main__": print(os.getpid()) values = [1, 2, 3, 4, 3] t0 = time.time() results = list(map(double, values)) t1 = time.time() print(t1 - t0) executor = ProcessPoolExecutor() task = executor.submit(list(map(double, values))) print("I am main program") t2 = time.time() print(t2 - t1) task = executor.submit(list(map(double, values))) print(type(task.result)) print(results)
print('Making directories for MCEPs...') os.makedirs(mc_dir_train, exist_ok=True) os.makedirs(mc_dir_test, exist_ok=True) num_workers = len(speaker_dirs) print(f'Number of workers: {num_workers}') executer = ProcessPoolExecutor(max_workers=num_workers) futures = [] if perform_data_split == 'n': # current wavs working with (train) working_train_dir = target_wavpath_train for spk in tqdm(speaker_dirs): print(speaker_dirs) spk_dir = os.path.join(working_train_dir, spk) futures.append(executer.submit(partial(process_spk, spk_dir, mc_dir_train))) # current wavs working with (eval) working_eval_dir = target_wavpath_eval for spk in tqdm(speaker_dirs): spk_dir = os.path.join(working_eval_dir, spk) futures.append(executer.submit(partial(process_spk, spk_dir, mc_dir_test))) else: # current wavs we are working with (all for data split) working_dir = target_wavpath for spk in tqdm(speaker_dirs): spk_dir = os.path.join(working_dir, spk) futures.append(executer.submit(partial(process_spk_with_split, spk_dir, mc_dir_train, mc_dir_test))) result_list = [future.result() for future in tqdm(futures)] print('Completed:')
from concurrent.futures import ProcessPoolExecutor import time def sleeper(id_): print(f'{id_} started') time.sleep(2) print(f'{id_} ended') pool = ProcessPoolExecutor(max_workers=5) futures = [] for i in range(1, 13, 1): f = pool.submit(sleeper, i) futures.append(f) for i in futures: i.result()
def test_no_connection_sharing_among_processes(s3): executor = ProcessPoolExecutor() conn_id = executor.submit(_get_s3_id, s3).result() assert id(s3.connect()) != conn_id, \ "Processes should not share S3 connections."
threads *= 4 tpool = ProcessPoolExecutor(threads) # show device ids rs = client.query("show series") dev_ids = list(map(lambda x: x[0].split('=')[1], rs.raw['series'][0]['values'])) import csv def qry(dev_id: str): rs = client.query( f"select * from acc_data where dev_id='{dev_id}' and time >= '2019-03-26T02:00:00Z' AND time <= '2019-03-26T03:00:00Z'" ) with open(f'{dev_id}_output.csv', 'w', newline='') as f: csv_writer = csv.writer(f) for line in rs.raw['series'][0]['values']: csv_writer.writerow(line) start = time.time() futures = [] for id in dev_ids: futures.append(tpool.submit(qry, id)) print("program end") print("program processing time: ", time.time() - start)
def setup_routes(app): app.router.add_get('/data', get_all_data) app.router.add_get('/data/{mac}', get_data) if __name__ == '__main__': tags = { 'F4:A5:74:89:16:57': 'kitchen', 'CC:2C:6A:1E:59:3D': 'bedroom', 'BB:2C:6A:1E:59:3D': 'livingroom' } m = Manager() q = m.Queue() # Start background process executor = ProcessPoolExecutor(1) executor.submit(run_get_data_background, list(tags.keys()), q) loop = asyncio.get_event_loop() # Start data updater loop.create_task(data_update(q)) # Setup and start web application app = web.Application(loop=loop) setup_routes(app) web.run_app(app, host='0.0.0.0', port=5000)
def run_in_process(sync_fn, *args): pool = ProcessPoolExecutor(max_workers=1) result = yield pool.submit(sync_fn, *args) pool.shutdown() return result
def build_from_path(in_dir, out_dir, filelist_names, num_workers=16, tqdm=lambda x: x): wav_paths = [] # for all speakers, count index and either add to train_list/eval_list/test_list # Create wav path list wav_paths = glob.glob(os.path.join(in_dir, 'wav_16000', '*', '*.wav')) books = glob.glob(os.path.join(in_dir, 'pron', '*.txt')) books.sort() texts2d = [[] for i in range(len(books))] for i in range(len(books)): with open(books[i], 'r', encoding='utf-8-sig') as f: lines = f.readlines() texts2d[i] = lines for i in range(len(texts2d)): for j in range(len(texts2d[i])): text = texts2d[i][j].strip() texts2d[i][j] = text path = os.path.join(in_dir, 'wav_22050') if not os.path.exists(path): os.makedirs(path) executor = ProcessPoolExecutor(max_workers=num_workers) futures = [] futures_val = [] futures_test = [] index = 1 for wav_path in wav_paths: wav_filename = os.path.basename(wav_path) lists = wav_filename.split('_') speaker = lists[0] book = int(lists[1][1:3]) - 1 sentence = int(lists[2][1:3]) - 1 try: text = texts2d[book][sentence] except: print('ERROR! OUT OF RANGE: {}'.format(wav_filename)) out_path = wav_path.replace('wav_16000', 'wav_22050') dir = os.path.dirname(out_path) if not os.path.exists(dir): os.makedirs(dir) if int(index) % 400 == 0: futures_val.append( executor.submit( partial(_process_utterance, wav_path, out_path, speaker, text))) elif int(index) % 400 == 1: futures_test.append( executor.submit( partial(_process_utterance, wav_path, out_path, speaker, text))) else: futures.append( executor.submit( partial(_process_utterance, wav_path, out_path, speaker, text))) index += 1 write_metadata([future.result() for future in tqdm(futures)], out_dir, filelist_names[0]) write_metadata([future.result() for future in tqdm(futures_val)], out_dir, filelist_names[1]) write_metadata([future.result() for future in tqdm(futures_test)], out_dir, filelist_names[2])
def process_file_list(self, run_args): """ Process a single image tile < 5000x5000 in size. """ for variable, value in run_args.items(): self.__setattr__(variable, value) assert self.mem_usage < 1.0 and self.mem_usage > 0.0 # * depend on the number of samples and their size, this may be less efficient patterning = lambda x: re.sub("([\[\]])", "[\\1]", x) file_path_list = glob.glob(patterning("%s/*" % self.input_dir)) file_path_list.sort() # ensure same order assert len(file_path_list) > 0, 'Not Detected Any Files From Path' rm_n_mkdir(self.output_dir + '/json/') rm_n_mkdir(self.output_dir + '/mat/') rm_n_mkdir(self.output_dir + '/overlay/') if self.save_qupath: rm_n_mkdir(self.output_dir + "/qupath/") def proc_callback(results): """Post processing callback. Output format is implicit assumption, taken from `_post_process_patches` """ img_name, pred_map, pred_inst, inst_info_dict, overlaid_img = results inst_type = [[k, v["type"]] for k, v in inst_info_dict.items()] inst_type = np.array(inst_type) mat_dict = { "inst_map": pred_inst, "inst_type": inst_type, } if self.nr_types is None: # matlab does not have None type array mat_dict.pop("inst_type", None) if self.save_raw_map: mat_dict["raw_map"] = pred_map save_path = "%s/mat/%s.mat" % (self.output_dir, img_name) sio.savemat(save_path, mat_dict) save_path = "%s/overlay/%s.png" % (self.output_dir, img_name) cv2.imwrite(save_path, cv2.cvtColor(overlaid_img, cv2.COLOR_RGB2BGR)) if self.save_qupath: nuc_val_list = list(inst_info_dict.values()) nuc_type_list = np.array([v["type"] for v in nuc_val_list]) nuc_coms_list = np.array([v["centroid"] for v in nuc_val_list]) save_path = "%s/qupath/%s.tsv" % (self.output_dir, img_name) convert_format.to_qupath(save_path, nuc_coms_list, nuc_type_list, self.type_info_dict) save_path = "%s/json/%s.json" % (self.output_dir, img_name) self.__save_json(save_path, inst_info_dict, None) return img_name def detach_items_of_uid(items_list, uid, nr_expected_items): item_counter = 0 detached_items_list = [] remained_items_list = [] while True: pinfo, pdata = items_list.pop(0) pinfo = np.squeeze(pinfo) if pinfo[-1] == uid: detached_items_list.append([pinfo, pdata]) item_counter += 1 else: remained_items_list.append([pinfo, pdata]) if item_counter == nr_expected_items: break # do this to ensure the ordering remained_items_list = remained_items_list + items_list return detached_items_list, remained_items_list proc_pool = None if self.nr_post_proc_workers > 0: proc_pool = ProcessPoolExecutor(self.nr_post_proc_workers) while len(file_path_list) > 0: hardware_stats = psutil.virtual_memory() available_ram = getattr(hardware_stats, "available") available_ram = int(available_ram * self.mem_usage) # available_ram >> 20 for MB, >> 30 for GB # TODO: this portion looks clunky but seems hard to detach into separate func # * caching N-files into memory such that their expected (total) memory usage # * does not exceed the designated percentage of currently available memory # * the expected memory is a factor w.r.t original input file size and # * must be manually provided file_idx = 0 use_path_list = [] cache_image_list = [] cache_patch_info_list = [] cache_image_info_list = [] while len(file_path_list) > 0: file_path = file_path_list.pop(0) img = cv2.imread(file_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) src_shape = img.shape img, patch_info, top_corner = _prepare_patching( img, self.patch_input_shape, self.patch_output_shape, True) self_idx = np.full(patch_info.shape[0], file_idx, dtype=np.int32) patch_info = np.concatenate([patch_info, self_idx[:, None]], axis=-1) # ? may be expensive op patch_info = np.split(patch_info, patch_info.shape[0], axis=0) patch_info = [np.squeeze(p) for p in patch_info] # * this factor=5 is only applicable for HoVerNet expected_usage = sys.getsizeof(img) * 5 available_ram -= expected_usage if available_ram < 0: break file_idx += 1 # if file_idx == 4: break use_path_list.append(file_path) cache_image_list.append(img) cache_patch_info_list.extend(patch_info) # TODO: refactor to explicit protocol cache_image_info_list.append( [src_shape, len(patch_info), top_corner]) # * apply neural net on cached data dataset = SerializeFileList(cache_image_list, cache_patch_info_list, self.patch_input_shape) dataloader = data.DataLoader( dataset, num_workers=self.nr_inference_workers, batch_size=self.batch_size, drop_last=False, ) pbar = tqdm.tqdm( desc="Process Patches", leave=True, total=int(len(cache_patch_info_list) / self.batch_size) + 1, ncols=80, ascii=True, position=0, ) accumulated_patch_output = [] for batch_idx, batch_data in enumerate(dataloader): sample_data_list, sample_info_list = batch_data sample_output_list = self.run_step(sample_data_list) sample_info_list = sample_info_list.numpy() curr_batch_size = sample_output_list.shape[0] sample_output_list = np.split(sample_output_list, curr_batch_size, axis=0) sample_info_list = np.split(sample_info_list, curr_batch_size, axis=0) sample_output_list = list( zip(sample_info_list, sample_output_list)) accumulated_patch_output.extend(sample_output_list) pbar.update() pbar.close() # * parallely assemble the processed cache data for each file if possible future_list = [] for file_idx, file_path in enumerate(use_path_list): image_info = cache_image_info_list[file_idx] file_ouput_data, accumulated_patch_output = detach_items_of_uid( accumulated_patch_output, file_idx, image_info[1]) # * detach this into func and multiproc dispatch it src_pos = image_info[ 2] # src top left corner within padded image src_image = cache_image_list[file_idx] src_image = src_image[src_pos[0]:src_pos[0] + image_info[0][0], src_pos[1]:src_pos[1] + image_info[0][1], ] base_name = pathlib.Path(file_path).stem file_info = { "src_shape": image_info[0], "src_image": src_image, "name": base_name, } post_proc_kwargs = { "nr_types": self.nr_types, "return_centroids": True, } # dynamicalize this overlay_kwargs = { "draw_dot": self.draw_dot, "type_colour": self.type_info_dict, "line_thickness": 2, } func_args = ( self.post_proc_func, post_proc_kwargs, file_ouput_data, file_info, overlay_kwargs, ) # dispatch for parallel post-processing if proc_pool is not None: proc_future = proc_pool.submit(_post_process_patches, *func_args) # ! manually poll future and call callback later as there is no guarantee # ! that the callback is called from main thread future_list.append(proc_future) else: proc_output = _post_process_patches(*func_args) proc_callback(proc_output) if proc_pool is not None: # loop over all to check state a.k.a polling for future in as_completed(future_list): # TODO: way to retrieve which file crashed ? # ! silent crash, cancel all and raise error if future.exception() is not None: log_info("Silent Crash") # ! cancel somehow leads to cascade error later # ! so just poll it then crash once all future # ! acquired for now # for future in future_list: # future.cancel() # break else: file_path = proc_callback(future.result()) log_info("Done Assembling %s" % file_path) return
def process_pool_executor_handler(executor: ProcessPoolExecutor, manager: DownloadProcess, file_maps: Dict[str, str], directory: str) -> None: done_queue = JoinableQueue() def update_hook(future: Future): temp = future.result() if temp: for failed_links in temp: done_queue.put(failed_links) while manager.done_retries != manager.max_retries: print( f"Starting download {manager.get_total_links() - manager.get_total_downloaded_links_count()} links left" ) available_cpus = [0, 1, 2, 3 ] if platform.system() == "Windows" else list( os.sched_getaffinity(os.getpid())) print( f"available cpu's {available_cpus}, initializing {4 * manager.get_process_num()}" f" threads with {manager.get_thread_num()} links per " f"process") if len(manager.error_links): download_links = manager.error_links.copy() manager.error_links = [] else: download_links = manager.get_download_links().copy() process_futures: List[Future] = [] start = 0 for temp_num in range(len(download_links)): end = start + manager.get_thread_num() if end > len(download_links): end = len(download_links) cpu_num = available_cpus[temp_num % len(available_cpus)] process_futures.append( executor.submit(start_threads, download_links[start:end], file_maps, manager.get_session(), directory, manager.http2, manager.debug, cpu_num)) process_futures[-1].add_done_callback(update_hook) start = end if end >= len(download_links): break wait(process_futures) while not done_queue.empty(): link = done_queue.get() manager.error_links.append(link) manager.set_total_downloaded_links_count(manager.get_total_links() - len(manager.error_links)) if manager.debug: print( f"Total downloaded links {manager.get_total_downloaded_links_count()}" ) print(f"Error links generated {len(manager.error_links)}") if len(manager.error_links): manager.set_thread_num( int( ceil((manager.get_total_links() - manager.get_total_downloaded_links_count()) / manager.get_process_num()))) print( f"{manager.get_total_links()} was expected but " f"{manager.get_total_downloaded_links_count()} was downloaded." ) manager.done_retries += 1 print(f"Trying retry {manager.done_retries}") else: break
continue if funcs: await asyncio.wait(funcs) else: await asyncio.sleep(0.2) def run_get_datas_background(queue): def handle_new_data(new_data): current_time = datetime.now() sensor_mac = new_data[0] sensor_data = new_data[1] if sensor_mac not in all_data or all_data[sensor_mac]['data'] != sensor_data: update_data = {'mac': sensor_mac, 'data': sensor_data, 'timestamp': current_time.isoformat()} all_data[sensor_mac] = update_data queue.put(update_data) RuuviTagSensor.get_datas(handle_new_data) m = Manager() q = m.Queue() executor = ProcessPoolExecutor() executor.submit(run_get_datas_background, q) loop = asyncio.get_event_loop() loop.run_until_complete(handle_queue(q))
pool = ProcessPoolExecutor(8) futures = [] parmas = [] for d in data: data_name = d[:-4] data_path = os.path.join(eval_path, data_name) fig_path = os.path.join(path, 'eval_result/result', data_name) for dir_name in sub_dirs: if not os.path.exists(os.path.join(fig_path, dir_name)): os.makedirs(os.path.join(fig_path, dir_name)) parmas.append((data_path, data_name, fig_path)) for p in parmas: for bits in [3, 4, 5, 6, 7]: futures.append( pool.submit(run, p[0], p[1], True, p[2], bounded=False, x_axis=[bits])) for f in as_completed(futures): print(f.result()) pool.shutdown()
for utterance_id, wav_file in enumerate(wav_file_list): wav_file_path = os.path.join(file_path, wav_file) wav = audio.load_wav(wav_file_path) mel_spec = audio.melspectrogram(wav) save_file_name = str(speaker_id) + "_" + str(utterance_id) + ".npy" np.save(os.path.join(out_dataset, save_file_name), mel_spec) if __name__ == "__main__": # # preprocess(0, "p225") # list_speaker = os.listdir(hp.origin_data) # # thrs = [threading.Thread(target=preprocess, args=[speaker_id, file_name]) # # for speaker_id, file_name in enumerate(list_speaker)] # # [thr.start() for thr in thrs] # # [thr.join() for thr in thrs] # executor = ProcessPoolExecutor(max_workers=cpu_count()) # futures = [executor.submit(partial(preprocess, speaker_id, file_name)) # for speaker_id, file_name in enumerate(list_speaker)] # [future.result() for future in futures] list_speaker = os.listdir(hp.origin_data) executor = ProcessPoolExecutor(max_workers=cpu_count()) futures = [ executor.submit(partial(preprocess_test, speaker_id, file_name)) for speaker_id, file_name in enumerate(list_speaker) ] [future.result() for future in futures]
end='') def doing(thread2): print('休息一会{}'.format(t := random.random())) time.sleep(t) # for i in range(10): # task = thread2.submit(work, i, random.randint(1, 5)) # task_list.append(task) # task.running() # task.result() if __name__ == '__main__': thread2 = ThreadPoolExecutor(max_workers=10) process1 = ProcessPoolExecutor(max_workers=10) pool = multiprocessing.Pool(30) task_list = [] for j in range(100): pool.apply_async(doing, (thread2, )) task = process1.submit(doing, thread2) # task_list.append(task) # task.running() # task.result() # pool.close() # pool.join() print("\n线程都准备好了\t\n") process1.shutdown(wait=True) thread2.shutdown(wait=True) print("\t\n全部执行完毕")
class OptimizeWorker: def __init__(self, config: Config): self.config = config self.model = None # type: ChessModel self.loaded_filenames = set() self.loaded_data = deque( ) # this should just be a ring buffer i.e. queue of length 500,000 in AZ self.dataset = None self.optimizer = None self.executor = ProcessPoolExecutor( max_workers=config.trainer.cleaning_processes) def start(self): self.model = self.load_model() self.training() def training(self): self.compile_model() last_load_data_step = last_save_step = total_steps = self.config.trainer.start_total_steps self.load_play_data() while True: if self.dataset_size < self.config.trainer.min_data_size_to_learn: logger.info( f"dataset_size={self.dataset_size} is less than {self.config.trainer.min_data_size_to_learn}" ) sleep(60) self.load_play_data() continue #self.update_learning_rate(total_steps) steps = self.train_epoch(self.config.trainer.epoch_to_checkpoint) total_steps += steps #if last_save_step + self.config.trainer.save_model_steps < total_steps: self.save_current_model() last_save_step = total_steps # if last_load_data_step + self.config.trainer.load_data_steps < total_steps: # self.load_play_data() # last_load_data_step = total_steps def train_epoch(self, epochs): tc = self.config.trainer state_ary, policy_ary, value_ary = self.dataset self.model.model.fit(state_ary, [policy_ary, value_ary], batch_size=tc.batch_size, epochs=epochs, shuffle=True) steps = (state_ary.shape[0] // tc.batch_size) * epochs return steps def compile_model(self): from keras.optimizers import SGD, Adam self.optimizer = Adam() #SGD(lr=2e-1, momentum=0.9) # Adam better? losses = ['categorical_crossentropy', 'mean_squared_error'] # avoid overfit for supervised self.model.model.compile(optimizer=self.optimizer, loss=losses, loss_weights=self.config.trainer.loss_weights) def save_current_model(self): rc = self.config.resource model_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") model_dir = os.path.join( rc.next_generation_model_dir, rc.next_generation_model_dirname_tmpl % model_id) os.makedirs(model_dir, exist_ok=True) config_path = os.path.join(model_dir, rc.next_generation_model_config_filename) weight_path = os.path.join(model_dir, rc.next_generation_model_weight_filename) self.model.save(config_path, weight_path) def load_play_data(self): filenames = get_game_data_filenames(self.config.resource) updated = False for filename in filenames: if filename in self.loaded_filenames: continue self.load_data_from_file(filename) updated = True # for filename in (self.loaded_filenames - set(filenames)): # self.unload_data_of_file(filename) # updated = True if updated: logger.debug("updating training dataset") self.dataset = self.collect_all_loaded_data() def collect_all_loaded_data(self): state_ary, policy_ary, value_ary = [], [], [] while self.loaded_data: s, p, v = self.loaded_data.popleft().result() state_ary.append(s) policy_ary.append(p) value_ary.append(v) state_ary = np.concatenate(state_ary) policy_ary = np.concatenate(policy_ary) value_ary = np.concatenate(value_ary) return state_ary, policy_ary, value_ary def load_model(self): from chess_zero.agent.model_chess import ChessModel model = ChessModel(self.config) rc = self.config.resource dirs = get_next_generation_model_dirs(rc) if not dirs: logger.debug("loading best model") if not load_best_model_weight(model): raise RuntimeError("Best model can not loaded!") else: latest_dir = dirs[-1] logger.debug("loading latest model") config_path = os.path.join( latest_dir, rc.next_generation_model_config_filename) weight_path = os.path.join( latest_dir, rc.next_generation_model_weight_filename) model.load(config_path, weight_path) return model def load_data_from_file(self, filename): # try: logger.debug(f"loading data from {filename}") data = read_game_data_from_file(filename) self.loaded_data.append( self.executor.submit(convert_to_cheating_data, data)) ### HEEEERE, use with SL self.loaded_filenames.add(filename) # except Exception as e: # logger.warning(str(e)) @property def dataset_size(self): if self.dataset is None: return 0 return len(self.dataset[0])
from concurrent.futures import ProcessPoolExecutor from time import sleep from validate_prime import is_prime, PRIMES executor = ProcessPoolExecutor(4) futures = [executor.submit(is_prime, p) for p in PRIMES[:6]] while not all(map(lambda f: f.done(), futures)): print('do sth else, waiting') sleep(1) print([f.result() for f in futures])
w_lines_cl = [] for line in files_cl: wav_path, text = line[0], line[1] mel = get_mel(wav_path) save_mel_path = str(Path(save_mel_dir).joinpath(Path(wav_path).name.replace(".wav", ".npy"))) np.save(save_mel_path, mel) w_line = "{}|{}\n".format(Path(save_mel_path).name, text) w_lines_cl.append(w_line) return w_lines_cl if __name__ == '__main__': """ 注意:tengxun数据集采样率为16000 """ meta_file = '/home/huangjiahong/tmp/tts/dataset/api/combine_dataset/tengxun_for_pytorch_tactron2/meta.txt' save_mel_dir = '/home/huangjiahong/tmp/tts/dataset/api/combine_dataset/tengxun_for_pytorch_tactron2/mels' save_meta_file = '/home/huangjiahong/tmp/tts/dataset/api/combine_dataset/tengxun_for_pytorch_tactron2/train.txt' filelines = files_to_list(meta_file) group_num = len(filelines) // 1000 lines_groups = [filelines[i:i + group_num] for i in range(0, len(filelines), group_num)] executor = ProcessPoolExecutor(max_workers=4) all_task = [executor.submit(partial(process_groups, files_cl, save_mel_dir)) for files_cl in lines_groups] with open(save_meta_file, 'w', encoding='utf-8') as f: for task in tqdm(all_task): lines = task.result() for line in lines: f.write(line)
def main(): args = parse_arguments() random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) os.environ['PYTHONHASHSEED'] = str(args.seed) torch.cuda.manual_seed_all(args.seed) worker_init = WorkerInitObj(args.seed) torch.backends.cudnn.benchmark = False torch.backends.cudnn.deterministic = True torch.backends.cudnn.enabled = True device, args = setup_training(args) model, optimizer, criterion = prepare_model_and_optimizer(args, device) pool = ProcessPoolExecutor(1) train_iter = subsetDataloader(path=args.train_path, batch_size=args.batch_size, worker_init=worker_init) test_iter = subsetDataloader(path=args.val_path, batch_size=args.batch_size, worker_init=worker_init) print('-' * 50 + 'args' + '-' * 50) for k in list(vars(args).keys()): print('{0}: {1}'.format(k, vars(args)[k])) print('-' * 30) print(model) print('-' * 50 + 'args' + '-' * 50) global_step = 0 global_auc = 0 s_time_train = time.time() for epoch in range(args.epoch): dataset_future = pool.submit(subsetDataloader, args.train_path, args.batch_size, worker_init) for step, batch in enumerate(train_iter): model.train() labels = batch['label'].to(device).float() batch = { t: {k: v.to(device) for k, v in d.items()} for t, d in batch.items() if isinstance(d, dict) } optimizer.zero_grad() logits = model(batch) # print('logits', logits) # print('label', labels) loss = criterion(logits, labels) loss.backward() optimizer.step() # evaluate if global_step != 0 and global_step % args.eval_freq == 0: s_time_eval = time.time() model.eval() auc = evaluate(model, test_iter, device) e_time_eval = time.time() print('-' * 68) print('Epoch:[{0}] Step:[{1}] AUC:[{2}] time:[{3}s]'.format( epoch, global_step, format(auc, '.4f'), format(e_time_eval - s_time_eval, '.4f'))) if auc > global_auc: model_to_save = model.module if hasattr( model, 'module') else model output_save_file = os.path.join( args.output_dir, "{}_auc_{}_step_{}_ckpt.pt".format( args.model_name, format(auc, '.4f'), global_step)) if os.path.exists(output_save_file): os.system('rm -rf {}'.format(output_save_file)) torch.save( { 'model': model_to_save.state_dict(), 'name': args.model_name }, output_save_file) print('Epoch:[{0}] Step:[{1}] SavePath:[{2}]'.format( epoch, global_step, output_save_file)) global_auc = auc print('-' * 68) # log if global_step != 0 and global_step % args.log_freq == 0: e_time_train = time.time() print('Epoch:[{0}] Step:[{1}] Loss:[{2}] Lr:[{3}] time:[{4}s]'. format(epoch, global_step, format(loss.item(), '.4f'), format(optimizer.param_groups[0]['lr'], '.6'), format(e_time_train - s_time_train, '.4f'))) s_time_train = time.time() global_step += 1 del train_iter train_iter = dataset_future.result(timeout=None)
class DataRouter(object): def __init__(self, config, component_builder): self._training_processes = max(config['max_training_processes'], 1) self.config = config self.responses = self._create_query_logger(config) self.model_dir = config['path'] self.emulator = self._create_emulator() self.component_builder = component_builder if component_builder else ComponentBuilder( use_cache=True) self.project_store = self._create_project_store() self.pool = ProcessPool(self._training_processes) def __del__(self): """Terminates workers pool processes""" self.pool.shutdown() def _create_query_logger(self, config): """Creates a logger that will persist incoming queries and their results.""" response_log_dir = config['response_log'] # Ensures different log files for different processes in multi worker mode if response_log_dir: # We need to generate a unique file name, even in multiprocess environments timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S') log_file_name = "rasa_nlu_log-{}-{}.log".format( timestamp, os.getpid()) response_logfile = os.path.join(response_log_dir, log_file_name) # Instantiate a standard python logger, which we are going to use to log requests utils.create_dir_for_file(response_logfile) query_logger = Logger(observer=jsonFileLogObserver( io.open(response_logfile, 'a', encoding='utf8')), namespace='query-logger') # Prevents queries getting logged with parent logger --> might log them to stdout logger.info("Logging requests to '{}'.".format(response_logfile)) return query_logger else: # If the user didn't provide a logging directory, we wont log! logger.info( "Logging of requests is disabled. (No 'request_log' directory configured)" ) return None def _collect_projects(self): if os.path.isdir(self.config['path']): projects = os.listdir(self.config['path']) else: projects = [] projects.extend(self._list_projects_in_cloud()) return projects def _create_project_store(self): projects = self._collect_projects() project_store = {} for project in projects: project_store[project] = Project(self.config, self.component_builder, project) if not project_store: project_store[RasaNLUConfig.DEFAULT_PROJECT_NAME] = Project( self.config) return project_store def _list_projects_in_cloud(self): try: from rasa_nlu.persistor import get_persistor p = get_persistor(self.config) if p is not None: return p.list_projects() else: return [] except Exception: logger.exception("Failed to list projects.") return [] def _create_emulator(self): """Sets which NLU webservice to emulate among those supported by Rasa""" mode = self.config['emulate'] if mode is None: from rasa_nlu.emulators import NoEmulator return NoEmulator() elif mode.lower() == 'wit': from rasa_nlu.emulators.wit import WitEmulator return WitEmulator() elif mode.lower() == 'luis': from rasa_nlu.emulators.luis import LUISEmulator return LUISEmulator() elif mode.lower() == 'dialogflow': from rasa_nlu.emulators.dialogflow import DialogflowEmulator return DialogflowEmulator() else: raise ValueError("unknown mode : {0}".format(mode)) def extract(self, data): return self.emulator.normalise_request_json(data) def parse(self, data): project = data.get("project") or RasaNLUConfig.DEFAULT_PROJECT_NAME model = data.get("model") if project not in self.project_store: projects = self._list_projects(self.config['path']) cloud_provided_projects = self._list_projects_in_cloud() projects.extend(cloud_provided_projects) if project not in projects: raise InvalidProjectError( "No project found with name '{}'.".format(project)) else: try: self.project_store[project] = Project( self.config, self.component_builder, project) except Exception as e: raise InvalidProjectError( "Unable to load project '{}'. Error: {}".format( project, e)) time = data.get('time') response, used_model = self.project_store[project].parse( data['text'], time, model) if self.responses: self.responses.info('', user_input=response, project=project, model=used_model) return self.format_response(response) @staticmethod def _list_projects(path): """List the projects in the path, ignoring hidden directories.""" return [os.path.basename(fn) for fn in utils.list_subdirectories(path)] @staticmethod def create_temporary_file(data, suffix=""): """Creates a tempfile.NamedTemporaryFile object for data""" if PY3: f = tempfile.NamedTemporaryFile("w+", suffix=suffix, delete=False, encoding="utf-8") f.write(data) else: f = tempfile.NamedTemporaryFile("w+", suffix=suffix, delete=False) f.write(data.encode("utf-8")) f.close() return f def parse_training_examples(self, examples, project, model): # type: (Optional[List[Message]], Text, Text) -> List[Dict[Text, Text]] """Parses a list of training examples to the project interpreter""" predictions = [] for ex in examples: logger.debug("Going to parse: {}".format(ex.as_dict())) response, _ = self.project_store[project].parse( ex.text, None, model) logger.debug("Received response: {}".format(response)) predictions.append(response) return predictions def format_response(self, data): return self.emulator.normalise_response_json(data) def get_status(self): # This will only count the trainings started from this process, if run in multi worker mode, there might # be other trainings run in different processes we don't know about. return { "available_projects": { name: project.as_dict() for name, project in self.project_store.items() } } def start_train_process(self, data, config_values): # type: (Text, Dict[Text, Any]) -> Deferred """Start a model training.""" f = self.create_temporary_file(data, "_training_data") # TODO: fix config handling _config = self.config.as_dict() for key, val in config_values.items(): _config[key] = val _config["data"] = f.name train_config = RasaNLUConfig(cmdline_args=_config) project = _config.get("project") if not project: raise InvalidProjectError("Missing project name to train") elif project in self.project_store: if self.project_store[project].status == 1: raise AlreadyTrainingError else: self.project_store[project].status = 1 elif project not in self.project_store: self.project_store[project] = Project(self.config, self.component_builder, project) self.project_store[project].status = 1 def training_callback(model_path): model_dir = os.path.basename(os.path.normpath(model_path)) self.project_store[project].update(model_dir) return model_dir def training_errback(failure): target_project = self.project_store.get( failure.value.failed_target_project) if target_project: target_project.status = 0 return failure logger.debug("New training queued") result = self.pool.submit(do_train_in_worker, train_config) result = deferred_from_future(result) result.addCallback(training_callback) result.addErrback(training_errback) return result def evaluate(self, data, project=None, model=None): # type: (Text, Optional[Text], Optional[Text]) -> Dict[Text, Any] """Perform a model evaluation.""" project = project or RasaNLUConfig.DEFAULT_PROJECT_NAME model = model or None f = self.create_temporary_file(data, "_training_data") test_data = load_data(f.name) if project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) preds_json = self.parse_training_examples(test_data.intent_examples, project, model) predictions = [{ "text": e.text, "intent": e.data.get("intent"), "predicted": p.get("intent", {}).get("name"), "confidence": p.get("intent", {}).get("confidence") } for e, p in zip(test_data.intent_examples, preds_json)] y_true = [e.data.get("intent") for e in test_data.intent_examples] y_true = clean_intent_labels(y_true) y_pred = [p.get("intent", {}).get("name") for p in preds_json] y_pred = clean_intent_labels(y_pred) report, precision, f1, accuracy = get_evaluation_metrics( y_true, y_pred) return { "intent_evaluation": { "report": report, "predictions": predictions, "precision": precision, "f1_score": f1, "accuracy": accuracy } }
class BokehTornado(TornadoApplication): ''' A Tornado Application used to implement the Bokeh Server. The Server class is the main public interface, this class has Tornado implementation details. Args: applications (dict of str : bokeh.application.Application) : map from paths to Application instances The application is used to create documents for each session. extra_patterns (seq[tuple]) : tuples of (str, http or websocket handler) Use this argmument to add additional endpoints to custom deployments of the Bokeh Server. ''' def __init__(self, applications, io_loop=None, extra_patterns=None): if io_loop is None: io_loop = IOLoop.current() self._loop = io_loop self._resources = {} # Wrap applications in ApplicationContext self._applications = dict() for k,v in applications.items(): self._applications[k] = ApplicationContext(v, self._loop) extra_patterns = extra_patterns or [] relative_patterns = [] for key in applications: for p in per_app_patterns: if key == "/": route = p[0] else: route = key + p[0] relative_patterns.append((route, p[1], { "application_context" : self._applications[key] })) websocket_path = None for r in relative_patterns: if r[0].endswith("/ws"): websocket_path = r[0] if not websocket_path: raise RuntimeError("Couldn't find websocket path") for r in relative_patterns: r[2]["bokeh_websocket_path"] = websocket_path all_patterns = extra_patterns + relative_patterns + toplevel_patterns log.debug("Patterns are: %r", all_patterns) super(BokehTornado, self).__init__(all_patterns, **settings) self._clients = set() self._executor = ProcessPoolExecutor(max_workers=4) self._loop.add_callback(self._start_async) self._stats_job = PeriodicCallback(self.log_stats, 15.0 * 1000, io_loop=self._loop) self._stats_job.start() self._unused_session_linger_seconds = 60*30 self._cleanup_job = PeriodicCallback(self.cleanup_sessions, 17.0 * 1000, io_loop=self._loop) self._cleanup_job.start() @property def io_loop(self): return self._loop def root_url_for_request(self, request): # If we add a "whole server prefix," we'd put that on here too return request.protocol + "://" + request.host + "/" def websocket_url_for_request(self, request, websocket_path): protocol = "ws" if request.protocol == "https": protocol = "wss" return protocol + "://" + request.host + websocket_path def resources(self, request): root_url = self.root_url_for_request(request) if root_url not in self._resources: self._resources[root_url] = Resources(mode="server", root_url=root_url) return self._resources[root_url] def start(self): ''' Start the Bokeh Server application main loop. Args: Returns: None Notes: Keyboard interrupts or sigterm will cause the server to shut down. ''' try: self._loop.start() except KeyboardInterrupt: print("\nInterrupted, shutting down") def stop(self): ''' Stop the Bokeh Server application. Returns: None ''' self._loop.stop() @property def executor(self): return self._executor def new_connection(self, protocol, socket, application_context, session): connection = ServerConnection(protocol, socket, application_context, session) self._clients.add(connection) return connection def client_lost(self, connection): self._clients.discard(connection) connection.detach_session() def get_session(self, app_path, session_id): if app_path not in self._applications: raise ValueError("Application %s does not exist on this server" % app_path) return self._applications[app_path].get_session(session_id) def cleanup_sessions(self): for app in self._applications.values(): app.cleanup_sessions(self._unused_session_linger_seconds) def log_stats(self): log.debug("[pid %d] %d clients connected", os.getpid(), len(self._clients)) @gen.coroutine def run_in_background(self, _func, *args, **kwargs): """ Run a synchronous function in the background without disrupting the main thread. Useful for long-running jobs. """ res = yield self._executor.submit(_func, *args, **kwargs) raise gen.Return(res) @gen.coroutine def _start_async(self): try: atexit.register(self._atexit) signal.signal(signal.SIGTERM, self._sigterm) except Exception: self.exit(1) _atexit_ran = False def _atexit(self): if self._atexit_ran: return self._atexit_ran = True self._stats_job.stop() IOLoop.clear_current() loop = IOLoop() loop.make_current() loop.run_sync(self._cleanup) def _sigterm(self, signum, frame): print("Received SIGTERM, shutting down") self.stop() self._atexit() @gen.coroutine def _cleanup(self): log.debug("Shutdown: cleaning up") self._executor.shutdown(wait=False) self._clients.clear()
def make_figures(path, limits, ncores, fudge_factor, scale): # look for pump-probe data file path = Path(path) pool = ProcessPoolExecutor(max_workers=ncores) #rcParams.update(params) try: ddfile = h5py.File(str(path/'pump-probe.h5'), 'r') absfile = h5py.File(str(path/'absorption.h5'), 'r') except FileNotFoundError as e: print('Datafiles not found in dir {!s}'.format(path)) return if scale < 0: scale = None # load ref ddref = np.array(ddfile['reference']).imag shape = (100, *ddref.shape) # calculate average pump-probe tmp = da.from_array(ddfile['00000/data'], chunks=shape) pts_used = tmp.shape[0] - 1 # assume we did Stark averaging rdataon = tmp[:pts_used].imag.mean(axis=0) rdataoff = tmp[pts_used].imag # last one is the field-off data dd = StarkData(*dask.compute(rdataon, rdataoff)) w3, w1 = np.array(ddfile['w3']), np.array(ddfile['w1']) # load evecs energies = np.array(ddfile['meta/one band energies']) nstates = energies.shape[0] evecs2 = np.array(ddfile['meta/ge eigenvectors'])**2 reorgs = np.array(ddfile['meta/reorganization energy matrix']) sbcouplingdiag = np.diag(ddfile['meta/sb coupling diagonal']) sbcouplingoffdiag = np.diag(ddfile['meta/sb coupling off-diagonal']) redfield = np.array(ddfile['meta/redfield relaxation matrix']) reorgs = np.diag(reorgs)[1:nstates+1] redfield = np.diag(redfield)[1:nstates+1] dephasingmat = np.array(ddfile['meta/lifetime dephasing matrix'])[:, ::2] + \ 1j*np.array(ddfile['meta/lifetime dephasing matrix'])[:, 1::2] mu2_trace = np.linalg.norm(np.array(ddfile['00000/meta/ge dipoles'])[..., 2:], axis=-1)**2 cfg = QcfpConfig.from_yaml(str(np.array(ddfile['cfg']))) if not cfg.include_complex_lifetimes: dephasingmat = dephasingmat.real imagdeph = dephasingmat[1:nstates+1,0].imag fixed_energies = energies - reorgs fixed_energies2 = energies - reorgs + imagdeph + fudge_factor # fudge_factor is the calibrated correction factor that comes from Stokes # shift, which moves the location of the monomer reorgs_trace = np.diagonal(ddfile['00000/meta/reorganization energy matrix'], axis1=1, axis2=2)[:, 1:nstates+1] evecs2_trace = np.array(ddfile['00000/meta/ge eigenvectors'])**2 energies_trace = np.array(ddfile['00000/meta/one band energies']) dephasingmat_trace = np.array(ddfile['00000/meta/lifetime dephasing ' 'matrix'])[:,:,::2] + \ 1j*np.array(ddfile['00000/meta/lifetime dephasing ' 'matrix'])[:,:,1::2] if not cfg.include_complex_lifetimes: dephasingmat_trace = dephasingmat_trace.real imagdeph_trace = dephasingmat_trace[:, 1:nstates+1,0].imag corr_energies = energies_trace - reorgs_trace + imagdeph_trace + fudge_factor # prepare folder for writing things figpath = (path / 'figures') figpath.mkdir(exist_ok=True) with (figpath / 'eigen-energies.info').open('w') as f: print('Eigen-energies:', energies, file=f) print('GE reorganization energies:', reorgs, file=f) print('Reorg\'ed energies:', fixed_energies, file=f) print('Dephasing: ', imagdeph, file=f) print('Reorg\'ed energies + deph + fudge:', fixed_energies2, file=f) print(file=f) for i in range(evecs2.shape[0]): print('Localization of eigenstate {:d}:'.format(i), file=f) print(evecs2[i, :], file=f) print(file=f) print('S-B diagonal couplings:', sbcouplingdiag, file=f) print('S-B off-diagonal couplings:', sbcouplingoffdiag, file=f) print(file=f) # make diagnostic plots to make sure rotational averaging matches analytic s = str(figpath / '2d-reference.png') pool.submit(plot_2d, w1=w1, w3=w3, signal=ddref, path=s, axlim=limits, scale=scale) #plot_2d(w1=w1, w3=w3, signal=ddref, path=s, axlim=limits) #s = str(figpath / '2d-reference-old.png') #pool.submit(plot_result, w1=w1, w3=w3, signal=ddref, path=s, # show=False) s = str(figpath / '2d-fieldon.png') pool.submit(plot_2d, w1=w1, w3=w3, signal=dd.fieldon, path=s, axlim=limits, scale=scale) s = str(figpath / '2d-fieldoff.png') pool.submit(plot_2d, w1=w1, w3=w3, signal=dd.fieldoff, path=s, axlim=limits, scale=scale) s = str(figpath / '2d-stark.png') pool.submit(plot_2d, w1=w1, w3=w3, signal=dd.fieldon-dd.fieldoff, path=s, axlim=limits, scale=scale) for i in range(nstates): s = str(figpath / '2d-evecs{:d}.png'.format(i)) pool.submit(plot_evecs, corr_energies, evecs2_trace, i, s, axlim=limits) dd_projection = -(ddref).sum(axis=1) ddess_projection = -(dd.fieldon - dd.fieldoff).sum(axis=1) # do the same for absorption absref = np.array(absfile['reference']) shape = (100, *absref.shape) tmp = da.from_array(absfile['00000/data'], chunks=shape) pts_used = tmp.shape[0] - 1 rdataon = tmp[:pts_used].mean(axis=0) rdataoff = tmp[pts_used] abs = StarkData(*dask.compute(rdataon, rdataoff)) w3 = np.array(absfile['w3']) eigenenergies = {'with dephasing': fixed_energies2/1e3, 'without dephasing': fixed_energies/1e3} # add the localization plot s = str(figpath / 'linear-localization.png') fig, (ax1, ax2) = subplots(2, 1, sharex=True) for i in range(0, nstates): weights_trace = mu2_trace*evecs2_trace[:, i, :] heights, bins = np.histogram(energies_trace.reshape(-1)/1e3, bins=80, weights=weights_trace.reshape(-1), density=False) widths = np.diff(bins) ax1.bar(bins[:-1], heights/heights.max(), widths, alpha=0.8, label='site {:d}'.format(i+1)) ax1.legend() ax2.plot(w3/1e3, abs.fieldoff, label='field off') ax2.plot(w3/1e3, abs.fieldon, label='field on') ax2.plot(w3/1e3, abs.fieldon - abs.fieldoff, label='stark') ax2.set_xlabel(r'$\omega_t$ ($\times 10^3\ \mathrm{cm}^{-1}$)') ax2.set_xlim(*limits) ax2.legend() fig.savefig(str(s)) s = str(figpath / 'linear-reference.png') pool.submit(plot_linear, w3=w3, signal=absref, path=s, axlim=limits, eigenenergies=eigenenergies, scale=scale) s = str(figpath / 'linear-fieldoff.png') pool.submit(plot_linear, w3=w3, signal=abs.fieldoff, path=s, axlim=limits, eigenenergies=eigenenergies, scale=scale) s = str(figpath / 'linear-fieldon.png') pool.submit(plot_linear, w3=w3, signal=abs.fieldon, path=s, axlim=limits, eigenenergies=eigenenergies, scale=scale) s = str(figpath / 'linear-stark.png') pool.submit(plot_linear, w3=w3, signal=abs.fieldon - abs.fieldoff, path=s, axlim=limits, eigenenergies=eigenenergies, scale=scale) s = str(figpath / 'linear-projections.png') ax, scale2 = plot_linear(w3=w3, signal=abs.fieldoff, path=s, axlim=limits) plot_linear(w3=w3, signal=dd_projection, path=s, ax=ax, axlim=limits, eigenenergies=eigenenergies, scale=scale) s = str(figpath / 'linear-stark-projections.png') ax, scale2 = plot_linear(w3=w3, signal=abs.fieldon - abs.fieldoff, path=s, axlim=limits, scale=scale) plot_linear(w3=w3, signal=ddess_projection, path=s, ax=ax, axlim=limits, eigenenergies=eigenenergies, scale=scale) print('submitted some figures') pool.shutdown(wait=True)