Exemple #1
0
class _Worker(object):
    def __init__(self, protocol=None):
        self.protocol = protocol
        self.pool = ProcessPoolExecutor(max_workers=1)
        self.pool.submit(id, 42).result()  # start the worker process

    def run(self, func, *args, **kwargs):
        """Synchronous remote function call"""

        input_payload = dumps((func, args, kwargs), protocol=self.protocol)
        result_payload = self.pool.submit(
            call_func, input_payload, self.protocol).result()
        result = loads(result_payload)

        if isinstance(result, BaseException):
            raise result
        return result

    def memsize(self):
        workers_pids = [p.pid if hasattr(p, "pid") else p
                        for p in list(self.pool._processes)]
        num_workers = len(workers_pids)
        if num_workers == 0:
            return 0
        elif num_workers > 1:
            raise RuntimeError("Unexpected number of workers: %d"
                               % num_workers)
        return psutil.Process(workers_pids[0]).memory_info().rss

    def close(self):
        self.pool.shutdown(wait=True)
def main():
    """
    Makes banner requests with a ThreadPoolExecutor.
    """
    arg_parser = ArgumentParser()
    arg_parser.add_argument("--ip", help="IP address", required=True)
    arg_parser.add_argument("--pool", help="Executor pool type", choices=("thread", "process"), required=True)
    arg_parser.add_argument(
        "--workers", help="Number of executor workers", type=int, choices=range(1, 9), required=True
    )
    args = arg_parser.parse_args()

    ip = args.ip
    pool = args.pool
    workers = args.workers

    if pool == "process":
        executor = ProcessPoolExecutor(max_workers=workers)
    elif pool == "thread":
        executor = ThreadPoolExecutor(max_workers=workers)

    for i in range(1, 256):
        for port in get_ports():
            executor.submit(banner_request, "{0}.{1}".format(ip, i), port)

    print("[!] Finished spawning banner requests")
Exemple #3
0
def splice_gmaps(threadpool, tilefolder, tempfiles, name):
    processpool = ProcessPoolExecutor()
    caption = "Rendering Zoom Layers {}".format(name)
    loadingbar = Bar(caption=caption)
    loadingbar.set_progress(0, caption)
    pygame.display.update()

    side = 1600
    zoom_levels = 4
    factor = 2 ** (zoom_levels - 1)
    masterside = side * factor
    plates = generate_plate_coords(factor, tempfiles)

    master_surface = pygame.Surface((masterside, masterside))

    done = 0
    total = len(tempfiles) + len(plates) * sum((4 ** x for x in range(zoom_levels)))
    fraction = 100 / total

    def render_base_to_master(task):
        imgdata, size, location = task.result()
        tempsurf = pygame.image.frombuffer(imgdata, size, "RGB")
        master_surface.blit(tempsurf, location)

    tasks = []
    for masterpos, pieces in plates.items():
        master_surface.fill((132, 170, 248))

        for x, y in pieces:
            task = processpool.submit(unpack, tempfiles, x, y, ((x % factor) * side, (y % factor) * side))
            tasks.append(threadpool.submit(render_base_to_master, task))
            tasks.append(task)
        current_area = masterside

        for task in tasks:
            task.result()
            done += 0.5
            loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total))
        for z in range(zoom_levels):
            tasks = []
            pieces = masterside // current_area
            x_off = masterpos[0] * pieces
            y_off = masterpos[1] * pieces
            for xp in range(pieces):
                for yp in range(pieces):
                    temp = pygame.Surface.subsurface(master_surface,
                                                     (xp * current_area, yp * current_area, current_area, current_area))
                    filename = "screen_{}_{}_{}.png".format(z + 1, x_off + xp, y_off + yp)
                    data = pygame.image.tostring(temp, "RGB")
                    tasks.append(processpool.submit(render_plate, data, tilefolder, temp.get_size(), side, filename))

            for task in tasks:
                task.result()
                done += 1
                loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total))
            current_area //= 2
    processpool.shutdown()
Exemple #4
0
def main():
	parser = argparse.ArgumentParser()
	group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument("--filter", action="store_true", help="act as a filter")
	group.add_argument("--transform", metavar="MAPPING", type=argparse.FileType("r"), help="transform all files given in the mapping file")
	parser.add_argument("--srcprefix", metavar="PREFIX", default="", help="when transforming data files prepend this PREFIX to source paths")
	parser.add_argument("--dstprefix", metavar="PREFIX", default="", help="when transforming data files prepend this PREFIX to destination paths")
	args = parser.parse_args()
	if args.filter:
		check_stream(sys.stdin, sys.stdout)
	else:
		exe = Executor()
		res = []
		for lineno, line in enumerate(args.transform):
			line = line.split('#', 1)[0]  # comment
			line = line.rstrip()  # trailing space or newline
			match = re.match(r'^(\S+):\s*(\S+)$', line)
			if not match:
				raise ValueError("syntax error on line %d" % (lineno + 1))
			destination, source = match.groups()
			source = os.path.join(args.srcprefix, source)
			destination = os.path.join(args.dstprefix, destination)
			res.append(exe.submit(transform, source, destination))
		while res:
			res.pop(0).result()  # propagate exceptions
Exemple #5
0
class ThreadPool(object):
    '''线程池实现'''

    def __init__(self, thread_num=1, process_num=1, q_size=2000, daemon=True):
        self.thread_pool = _ThreadPoolExecutor(thread_num, daemon)
        self.process_pool = ProcessPoolExecutor(process_num)
        self.result_queue = Queue(q_size)

    def wait(self, threads=[]):
        thread_wait(threads)

    def add_thread(self, target, args=()):
        result = self.thread_pool.submit(target, *args)
        return result

    def add_process(self, target, args=()):
        result = self.process_pool.submit(target, *args)
        return result

    def thread_map(self, target, args=[]):
        return [self.thread_pool.submit(target, arg) for arg in args]

    def process_map(self, target, args=[]):
        return self.process_pool.map(target, args)

    def map(self, target, args=[]):
        return self.process_map(target, args)
def build_from_path(hparams, input_dirs, mel_dir, linear_dir, wav_dir, n_jobs=12, tqdm=lambda x: x):
	"""
	Preprocesses the speech dataset from a gven input path to given output directories

	Args:
		- hparams: hyper parameters
		- input_dir: input directory that contains the files to prerocess
		- mel_dir: output directory of the preprocessed speech mel-spectrogram dataset
		- linear_dir: output directory of the preprocessed speech linear-spectrogram dataset
		- wav_dir: output directory of the preprocessed speech audio dataset
		- n_jobs: Optional, number of worker process to parallelize across
		- tqdm: Optional, provides a nice progress bar

	Returns:
		- A list of tuple describing the train examples. this should be written to train.txt
	"""

	# We use ProcessPoolExecutor to parallelize across processes, this is just for 
	# optimization purposes and it can be omited
	executor = ProcessPoolExecutor(max_workers=n_jobs)
	futures = []
	index = 1
	for input_dir in input_dirs:
		with open(os.path.join(input_dir, 'metadata.csv'), encoding='utf-8') as f:
			for line in f:
				parts = line.strip().split('|')
				wav_path = os.path.join(input_dir, 'wavs', '{}.wav'.format(parts[0]))
				text = parts[2]
				futures.append(executor.submit(partial(_process_utterance, mel_dir, linear_dir, wav_dir, index, wav_path, text, hparams)))
				index += 1

	return [future.result() for future in tqdm(futures) if future.result() is not None]
Exemple #7
0
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  '''Preprocesses the LJ Speech dataset from a given input path into a given output directory.

    Args:
      in_dir: The directory where you have downloaded the LJ Speech dataset
      out_dir: The directory to write the output into
      num_workers: Optional number of worker processes to parallelize across
      tqdm: You can optionally pass tqdm to get a nice progress bar

    Returns:
      A list of tuples describing the training examples. This should be written to train.txt
  '''

  # We use ProcessPoolExecutor to parallelize across processes. This is just an optimization and you
  # can omit it and just call _process_utterance on each input if you want.
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
    for line in f:
      parts = line.strip().split('|')
      wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
      text = parts[2]
      futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
      index += 1
  return [future.result() for future in tqdm(futures)]
Exemple #8
0
    def __call__(self, workflow, input_artifact_filepaths,
                 parameter_references, output_artifact_filepaths):
        input_artifact_abs_filepaths = \
            {k: os.path.abspath(v)
             for k, v in input_artifact_filepaths.items()}
        output_artifact_abs_filepaths = \
            {k: os.path.abspath(v)
             for k, v in output_artifact_filepaths.items()}
        job = workflow.to_script(input_artifact_abs_filepaths,
                                 parameter_references,
                                 output_artifact_abs_filepaths)
        temp_dir = tempfile.mkdtemp()
        pool = ProcessPoolExecutor(max_workers=1)
        py_filename = os.path.join(temp_dir, 'job.py')
        with open(py_filename, 'w') as py_file:
            py_file.write(job.code)
        # TODO: handle subproccess exceptions
        future = pool.submit(subprocess.run,
                             [self._python_executable, py_filename],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        # TODO: handle callback exceptions
        # TODO: make sure that tempdir is cleaned up even if there is an
        # exception in pool.submit or the callback
        future.add_done_callback(lambda _: shutil.rmtree(temp_dir))

        return future
Exemple #9
0
 def on_message(self, message):
     print len(message)
     result = yield tornado.gen.Task(self.process_message, message)
     return
     pool = ProcessPoolExecutor()
     fut = pool.submit(call_process, message)
     ret = yield fut
     pool.shutdown()
 def _run(self, instance_id: str, service_id: str, plan_id: str, accepts_incomplete: bool, func: Callable, *func_args) -> Any:
     # The _match_synchronicity call must come first because it may raise an exception
     sync = self._match_synchronicity(service_id, plan_id, accepts_incomplete)
     executor = ProcessPoolExecutor(max_workers=1)
     future = executor.submit(func, *func_args)
     if sync:
         return future.result(timeout=59)
     else:
         self.async_ops[instance_id] = future
         raise ProvisioningAsynchronously
Exemple #11
0
def probe( moduleName, args, torCtrl ):

    logger.info("Running module '%s'." % moduleName)
    module = __import__("modules.%s" % moduleName, fromlist=[moduleName])

    # Obtain the list of exit relays to scan.
    if args.exit:
        exitRelays = [args.exit]
    else:
        hosts = [(socket.gethostbyname(host), port) for
                 (host, port) in module.targets]
        exitRelays = exitselector.getExits(args.consensus,
                                           countryCode=args.country,
                                           hosts=hosts)

    count = len(exitRelays)
    if count < 1:
        raise error.ExitSelectionError("Exit selection yielded %d exits " \
                                       "but need at least one." % count)
    logger.info("About to probe %d exit relays." % count)

    # Create circuit pool and set up stream attacher.
    circuitPool = circuitpool.new(torCtrl, list(exitRelays))
    eventHandler = streamattacher.new(circuitPool, torCtrl)
    torCtrl.add_event_listener(eventHandler.newEvent, EventType.STREAM)

    circuits = torCtrl.get_circuits()
    logger.debug("Open circuits:")
    for circuit in circuits:
        logger.debug(circuit)

    executor = ProcessPoolExecutor(max_workers=const.CIRCUIT_POOL_SIZE)
    logger.debug("Beginning to populate process pool with %d jobs." % count)

    # Invoke a module instance for every exit relay.
    for _ in xrange(count, 0, -1):

        cmd = command.new(None)
        executor.submit(module.probe, cmd, count)
        count -= 1

    logger.info("Submitted jobs.  Terminating main scanner.")
 def post(self):
     file = self.request.files['file'][0]
     hark.client.login()
     hark.client.createSession(default_hark_config)
     log.info("Uploading asynchrounously")
     pool = ProcessPoolExecutor(max_workers=2)
     future = pool.submit(async_upload, file)
     yield future
     pool.shutdown()
     log.info("Rendering visualization page")
     self.render('visualize.html')
Exemple #13
0
def generate_stocks(freq=pd.Timedelta(seconds=60), directory=None):
    from concurrent.futures import ProcessPoolExecutor, wait
    e = ProcessPoolExecutor()
    if os.path.exists(os.path.join('data', 'daily')):
        glob_path = os.path.join('data', 'daily', '*')
    else:
        glob_path = os.path.join(daily_dir, '*')
    filenames = sorted(glob(glob_path))

    futures = [e.submit(generate_stock, fn, directory=directory, freq=freq)
                for fn in filenames]
    wait(futures)
Exemple #14
0
def build_from_path(in_dir, out_dir, num_workers=1):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []
    index = 1
    with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split('|')
            wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
            text = parts[2]
            futures.append(executor.submit(
                partial(_process_utterance, out_dir, index, wav_path, text)))
            index += 1
    return [future.result() for future in futures]
def main(argv=None):
    usage = """REDCap Data Model Generator

    Usage:
        redcap dball <version> [--dir=DIR] [--db=DB] [--host=HOST] [--port=PORT] [--user=USER] [--pass=PASS]

    Options:
        -h --help       Show this screen.
        --dir=DIR       Name of the directory to output the files [default: .].
        --db=DB         Name of the REDCap database [default: redcap].
        --host=HOST     Host of the database server [default: localhost].
        --port=PORT     Port of the database server [default: 3306].
        --user=USER     Username to connect with.
        --pass=PASS     Password to connect with. If set to *, a prompt will be provided.
        --procs=PROCS   Number of processes to spawn [default: 24].

    """  # noqa

    from docopt import docopt

    args = docopt(usage, argv=argv, version='0.1')

    if args['--pass'] == '*':
        args['--pass'] = getpass('password: '******'--db'],
                      args['--host'],
                      args['--port'],
                      args['--user'],
                      args['--pass'])

    project_names = db_projects(conn)

    pool = ProcessPoolExecutor(max_workers=int(args['--procs']))

    for name in project_names:
        pool.submit(worker, name, args)

    pool.shutdown()
Exemple #16
0
class ConcurrentDownloader(BaseDownloader, ConcurrentMixin):
    """Concurrent ProcessPoolExecutor downloader

    :param pool_size: size of ThreadPoolExecutor
    :param timeout: request timeout in seconds
    """
    def __init__(
            self, worker_class,
            worker_kwargs=None, pool_size=5, middlewares=None,):

        # configure executor
        self.pool_size = pool_size
        self.executor = ProcessPoolExecutor(max_workers=self.pool_size)

        # prepare worker params
        self.worker_params = {
            'worker_class': worker_class,
            'worker_kwargs': worker_kwargs or {},
        }

        # ctrl-c support for python2.x
        # trap sigint
        signal.signal(signal.SIGINT, lambda s, f: s)

        super(ConcurrentDownloader, self).__init__(
            middlewares=middlewares
        )

    def get(self, requests):

        for request in requests:
            # delegate request processing to the executor
            future = self.executor.submit(
                _run_download_worker, self.worker_params, request,
            )

            # build Planned object
            done_future = Planned()

            # when executor finish request - fire done_future
            future.add_done_callback(
                partial(self._done, request, done_future)
            )

            yield done_future

    def get_workers_count(self):
        return self.pool_size

    def stop(self):
        self.executor.shutdown()
Exemple #17
0
    def precompute_to_stream(self, stream, logger):
        """
        File format:
          int64: nnz in total
          padding to 128 bytes
          double[ni]: x_squared
          double[(lmax + 1) * ni]: Lambda_0
          double[(lmax + 1) * ni]: Lambda_1
          ushort[(lmax + 1)**2]: i_stops
          Format of i_stops is m-major ordering, but with, additionally, even coefficents
          all coming before the odd ones.
        """
        from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

        executor = ProcessPoolExecutor(max_workers=8)

        start_pos = stream.tell()
        for i in range(2 * (self.lmax + 1)):
            write_int64(stream, 0)
        write_array(stream, self.x_squared)

        futures = []
        for m in range(self.lmax + 1):
            for odd in [0, 1]:
                futures.append(executor.submit(precompute_single, self.thetas, self.lmax,
                                               self.epsilon_legendre, m, odd))
        
        nnz_total = 0
        Lambda_1_list = []
        i_stops_list = []
        nnz_list = []
        it = iter(futures)
        for m in range(self.lmax + 1):
            for odd in [0, 1]:
                Lambda_0, Lambda_1, i_stops, nnz = it.next().result()
                logger.info('Got %s m=%d' % (['even', 'odd'][odd], m))
                write_array(stream, Lambda_0)
                Lambda_1_list.append(Lambda_1)
                i_stops_list.append(i_stops)
                nnz_list.append(nnz)
                nnz_total += nnz
        for arr in Lambda_1_list:
            write_array(stream, arr)
        for arr in i_stops_list:
            write_array(stream, arr)
        end_pos = stream.tell()
        stream.seek(start_pos)
        for nnz in nnz_list:
            write_int64(stream, nnz)
        stream.seek(end_pos)
        return nnz_total
Exemple #18
0
class TaskManager:

    def __init__(self, process_num, max_task_in_queue=100):
        """
        :param process_num: max process number
        :param max_task_in_queue: max_process_number + pending task number
        """
        self.process_num = process_num
        self.max_task_in_queue = max_task_in_queue
        self.pool = ProcessPoolExecutor(max_workers=process_num)
        self.task_map = {}
        self.lock = multiprocessing.Lock()

    def exec_command(self, command):
        """ simple process
        :param command: command
        :return: true or false, if sent success return True else return False
        """
        self.lock.acquire()
        if len(self.task_map) < self.max_task_in_queue:
            self.task_map[command.timestamp] = self.pool.submit(command)
            self.task_map[command.timestamp].add_done_callback(functools.partial(self.task_done, command))
            self.lock.release()
            return True
        else:
            self.lock.release()
            return False

    def task_done(self, command, future_obj):
        """
        do not change this function
        :param command: comand obj
        :param future_obj: command result
        :return:
        """
        self.lock.acquire()
        # print("pop:" + str(command.timestamp))
        self.task_map.pop(command.timestamp)
        self.lock.release()

    #def shutdown(self):
    #    self.pool.shutdown()

    def is_all_done(self):
        self.lock.acquire()
        for key in self.task_map:
            if self.task_map[key].running():
                self.lock.release()
                return False
        self.lock.release()
        return True
def Main():
  global gSymFileManager, gOptions, gPool

  if not ReadConfigFile():
    return 1

  # In a perfect world, we could create a process per cpu core.
  # But then we'd have to deal with cache sharing
  gPool = Pool(1)
  gPool.submit(initializeSubprocess, gOptions)

  # Setup logging in the parent process.
  # Ensure this is called after the call to initializeSubprocess to
  # avoid duplicate messages in Unix systems.
  SetLoggingOptions(gOptions["Log"])

  LogMessage("Starting server with the following options:\n" + str(gOptions))

  app = Application([
    url(r'/(debug)', DebugHandler),
    url(r'/(nodebug)', DebugHandler),
    url(r"(.*)", SymbolHandler)])

  app.listen(gOptions['portNumber'], gOptions['hostname'])

  try:
    # select on Windows doesn't return on ctrl-c, add a periodic
    # callback to make ctrl-c responsive
    if sys.platform == 'win32':
        PeriodicCallback(lambda: None, 100).start()
    IOLoop.current().start()
  except KeyboardInterrupt:
    LogMessage("Received SIGINT, stopping...")

  gPool.shutdown()
  LogMessage("Server stopped - " + gOptions['hostname'] + ":" + str(gOptions['portNumber']))
  return 0
def spark_submit(exec_string, log_file, driver_path):
    """
    asynchronously run the pyspark/sparktk submitted script while writing the logs to the log_file for the app
    :param exec_string: the command that is going to be run
    :param log_file: the file containing command(script) logs while running
    :param driver_path: the path to the main sparktk/pyspark script within the uploads folder
    :return: None
    """
    print "Entering spark_submit"
    mark_submitted(driver_path)
    pool = Pool(max_workers=1)
    cmd_string = "%s >>%s 2>&1" % (exec_string, log_file)
    print "CMD stting is %s" % (cmd_string)
    future = pool.submit(subprocess.call, cmd_string, shell=True)
    future.driver_path = driver_path
    future.add_done_callback(mark_completed)
Exemple #21
0
def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  for book in books:
    with open(os.path.join(in_dir, book, 'sentence_index.txt')) as f:
      for line in f:
        parts = line.strip().split('\t')
        if line[0] is not '#' and len(parts) == 8 and float(parts[3]) > _min_confidence:
          wav_path = os.path.join(in_dir, book, 'wav', '%s.wav' % parts[0])
          labels_path = os.path.join(in_dir, book, 'lab', '%s.lab' % parts[0])
          text = parts[5]
          task = partial(_process_utterance, out_dir, index, wav_path, labels_path, text)
          futures.append(executor.submit(task))
          index += 1
  results = [future.result() for future in tqdm(futures)]
  return [r for r in results if r is not None]
    def __iter__(self):
        p = ProcessPoolExecutor(self.workers)
        cacheDir = os.path.join('cache', 'normalizedDocs')
        cacheDict = {}
        futures = OrderedDict()
            
        if os.path.exists(cacheDir):
            keySet = set(os.listdir(cacheDir))
            logging.debug('Read keySet from cache directory')
        else:
            keySet = set()
            self.mkDir(cacheDir)
            logging.debug('Cache is empty. Begin with empty keySet')
                    
        for doc in self.docGenerator:
            if 'lang' not in doc or 'filename' not in doc \
            or doc['lang'] != self.lang or 'plaintext' not in doc:
                logging.debug('Omitting document. Important parameters missing')
                continue
                        
            text = doc['plaintext']
            filename = doc['filename']
                        
            if not self.normalizeText:
                yield TaggedDocument(words=text.split(), tags=[filename])
            else:
                if filename in keySet and os.path.exists(os.path.join(cacheDir, filename)):
                    # the file has already been normalized, let's
                    # read the cache
                    with open(os.path.join(cacheDir, filename)) as fh:
                        logging.debug('Yielded from Cache')
                        yield LabeledSentence(words=json.load(fh).split(), tags=[filename])
                else:
                    futures[filename] = p.submit(normalize, text, lang=doc['lang'])

            if self.normalizeText:
                for k, v in futures.items():
                    v = v.result()
                    keySet.add(k)
                    with open(os.path.join(cacheDir, k), 'w') as fh:
                        json.dump(v, fh)
                        logging.debug('Yielded from Calculation')
                        yield LabeledSentence(words=v.split(), tags=[k])
Exemple #23
0
def infer_all(db_name):
    db = pymongo.MongoClient('127.0.0.1', 27017, connect=False).get_database(db_name)
    executor = ProcessPoolExecutor(max_workers=10)

    futures = []
    for collection_name in db.collection_names():
        if not is_q_col(collection_name):
            continue
        tid = collection_name[:-2]
        q_collection = db[collection_name]
        a_collection = db[q_to_a(collection_name)]
        for q_doc in q_collection.find({}, {'qid':1, 'topic':1}):
            qid = q_doc['qid']
            aids = [a_doc['aid'] for a_doc in
                    a_collection.find({'qid': qid}, {'aid': 1})]
            futures.append(
                executor.submit(infer_question_task(db_name, tid, qid, aids))
            )

    executor.shutdown()
Exemple #24
0
def compute_many(hashes, n_cpus=1, *args, **kwargs):
    if n_cpus != 1:
        pool = ProcessPoolExecutor(max_workers=n_cpus)
        futures = []
        for h in hashes:
            futures.append(pool.submit(compute_single, *args, hash=h, **kwargs))

        # Wait fot the futures to complete; give a progress bar
        with tqdm(total=len(futures), desc='Computing on %d cores' % n_cpus) as pbar:
            while len(futures):
                _done_is = []
                for f_i, f in enumerate(futures):
                    if f.done():
                        f.result()     # Raises exception on error
                        _done_is.append(f_i)
                        pbar.update(1)
                futures = [f for f_i, f in enumerate(futures) if not f_i in _done_is]
                time.sleep(0.1)
    else:
        for h in tqdm(hashes, desc='Computing on one core'):
            compute_single(h, *args, **kwargs)
Exemple #25
0
def infer_many(db_name, filename):
    """
    推断一些问题的回答, 读取文件, 每一行格式为
    topic,qid,...(后面是什么无所谓)
    """
    db = pymongo.MongoClient('127.0.0.1', 27017, connect=False).get_database(db_name)
    executor = ProcessPoolExecutor(max_workers=5)

    count = 0
    futures = []
    with open(filename) as f:
        for line in f:
            tid, qid, _ = line.split(',', maxsplit=2)
            a_collection = db[a_col(tid)]
            aids = [a_doc['aid'] for a_doc in
                    a_collection.find({'qid': qid}, {'aid': 1})]
            futures.append(
                executor.submit(infer_question_task, db_name, tid, qid, aids)
            )
            count += len(aids)

    print(count)
    executor.shutdown()
Exemple #26
0
class ConcurrentCrawler(BaseCrawler, ConcurrentMixin):
    """Concurrent ProcessPoolExecutor crawler

    :param pool_size: pool size of ProcessPoolExecutor
    :param timeout: request timeout in seconds
    """

    def __init__(self, worker_class, worker_kwargs=None, pool_size=5):

        # configure executor
        self.pool_size = pool_size
        self.executor = ProcessPoolExecutor(max_workers=self.pool_size)

        # prepare worker params
        self.worker_params = {
            'worker_class': worker_class,
            'worker_kwargs': worker_kwargs or {},
        }

        # inherit ENTRY_REQUESTS from worker_class
        self.ENTRY_REQUESTS = getattr(worker_class, 'ENTRY_REQUESTS', None)

    def process(self, response):
        # delegate response processing to the executor
        future = self.executor.submit(
            _run_crawler_worker, self.worker_params, response,
        )

        # build Planned object
        done_future = Planned()

        # when executor finish response processing - fire done_future
        future.add_done_callback(
            partial(self._done, response, done_future)
        )

        return done_future
Exemple #27
0
            hp.processed_corpus_path, file_name), move_path)
    
    print("Move Done.")

    if not os.path.exists("output"):
        os.mkdir("output")

    executor = ProcessPoolExecutor(max_workers=cpu_count())
    futures = list()
    # futures.append(executor.submit(
    #         partial(prepare_txt, save_name_list[ind], list_P)))

    for one_speaker_path in os.listdir(hp.vctk_wav_path):
        # logger = align_wav.align_wavs(os.path.join(
        #     hp.vctk_wav_path, one_speaker_path), "words_dict.txt", "output")
        futures.append(executor.submit(partial(align_wav.align_wavs, os.path.join(
            hp.vctk_wav_path, one_speaker_path), "words_dict.txt", "output")))
        # print(logger.read())

    for future in futures:
        future.result()
        # print(logger.read())

    # Cut Wav
    if not os.path.exists(hp.new_wav_path):
        os.mkdir(hp.new_wav_path)

    for ind, textgrid_name in enumerate(os.listdir(hp.output_file_name)):
        if textgrid_name[0] == "p":
            new_wav_folder = os.path.join(hp.new_wav_path, textgrid_name[0:4])
            if not os.path.exists(new_wav_folder):
                os.mkdir(new_wav_folder)
Exemple #28
0
def run_in_process(sync_fn, *args):
    pool = ProcessPoolExecutor(max_workers=1)
    result = yield pool.submit(sync_fn, *args)
    pool.shutdown()
    return result
Exemple #29
0
def _run_tests(all_tests, log_name_base, extra_args):
    global stop, executor, futures, system_compiler
    xmlname = log_name_base + '.xml'
    junit_root = ET.Element('testsuites')
    conf_time = 0
    build_time = 0
    test_time = 0
    passing_tests = 0
    failing_tests = 0
    skipped_tests = 0
    commands = (compile_commands, clean_commands, install_commands, uninstall_commands)

    try:
        # This fails in some CI environments for unknown reasons.
        num_workers = multiprocessing.cpu_count()
    except Exception as e:
        print('Could not determine number of CPUs due to the following reason:' + str(e))
        print('Defaulting to using only one process')
        num_workers = 1
    # Due to Ninja deficiency, almost 50% of build time
    # is spent waiting. Do something useful instead.
    #
    # Remove this once the following issue has been resolved:
    # https://github.com/mesonbuild/meson/pull/2082
    num_workers *= 2
    executor = ProcessPoolExecutor(max_workers=num_workers)

    for name, test_cases, skipped in all_tests:
        current_suite = ET.SubElement(junit_root, 'testsuite', {'name': name, 'tests': str(len(test_cases))})
        print()
        if skipped:
            print(bold('Not running %s tests.' % name))
        else:
            print(bold('Running %s tests.' % name))
        print()
        futures = []
        for t in test_cases:
            # Jenkins screws us over by automatically sorting test cases by name
            # and getting it wrong by not doing logical number sorting.
            (testnum, testbase) = os.path.split(t)[-1].split(' ', 1)
            testname = '%.3d %s' % (int(testnum), testbase)
            should_fail = False
            if name.startswith('failing'):
                should_fail = name.split('failing-')[1]
            result = executor.submit(run_test, skipped, t, extra_args, system_compiler, backend, backend_flags, commands, should_fail)
            futures.append((testname, t, result))
        for (testname, t, result) in futures:
            sys.stdout.flush()
            result = result.result()
            if (result is None) or (('MESON_SKIP_TEST' in result.stdo) and (skippable(name, t))):
                print(yellow('Skipping:'), t)
                current_test = ET.SubElement(current_suite, 'testcase', {'name': testname,
                                                                         'classname': name})
                ET.SubElement(current_test, 'skipped', {})
                skipped_tests += 1
            else:
                without_install = "" if len(install_commands) > 0 else " (without install)"
                if result.msg != '':
                    print(red('Failed test{} during {}: {!r}'.format(without_install, result.step.name, t)))
                    print('Reason:', result.msg)
                    failing_tests += 1
                    if result.step == BuildStep.configure and result.mlog != no_meson_log_msg:
                        # For configure failures, instead of printing stdout,
                        # print the meson log if available since it's a superset
                        # of stdout and often has very useful information.
                        failing_logs.append(result.mlog)
                    else:
                        failing_logs.append(result.stdo)
                    failing_logs.append(result.stde)
                else:
                    print('Succeeded test%s: %s' % (without_install, t))
                    passing_tests += 1
                conf_time += result.conftime
                build_time += result.buildtime
                test_time += result.testtime
                total_time = conf_time + build_time + test_time
                log_text_file(logfile, t, result.stdo, result.stde)
                current_test = ET.SubElement(current_suite, 'testcase', {'name': testname,
                                                                         'classname': name,
                                                                         'time': '%.3f' % total_time})
                if result.msg != '':
                    ET.SubElement(current_test, 'failure', {'message': result.msg})
                stdoel = ET.SubElement(current_test, 'system-out')
                stdoel.text = result.stdo
                stdeel = ET.SubElement(current_test, 'system-err')
                stdeel.text = result.stde
    print("\nTotal configuration time: %.2fs" % conf_time)
    print("Total build time: %.2fs" % build_time)
    print("Total test time: %.2fs" % test_time)
    ET.ElementTree(element=junit_root).write(xmlname, xml_declaration=True, encoding='UTF-8')
    return passing_tests, failing_tests, skipped_tests
def main():
    global timeout_sent

    args = parse_arguments()

    random.seed(args.seed + args.local_rank)
    np.random.seed(args.seed + args.local_rank)
    torch.manual_seed(args.seed + args.local_rank)
    torch.cuda.manual_seed(args.seed + args.local_rank)
    worker_init = WorkerInitObj(args.seed + args.local_rank)

    device, args = setup_training(args)

    # Prepare optimizer
    (
        model,
        optimizer,
        lr_scheduler,
        checkpoint,
        global_step,
        criterion,
    ) = prepare_model_and_optimizer(args, device)

    raw_train_start = None
    most_recent_ckpts_paths = []
    average_loss = 0.0  # averaged loss every args.log_freq steps
    epoch = 0
    training_steps = 0
    test_losses = []

    pool = ProcessPoolExecutor(1)

    # Note: We loop infinitely over epochs, termination is handled via iteration count
    while True:
        thread = None
        restored_data_loader = None
        if (not args.resume_from_checkpoint or epoch > 0
                or (args.phase2 and global_step < 1) or args.init_checkpoint):
            files = [
                os.path.join(args.input_dir, f)
                for f in os.listdir(args.input_dir)
                if os.path.isfile(os.path.join(args.input_dir, f))
                and "training" in f
            ]
            files.sort()
            num_files = len(files)
            random.Random(args.seed + epoch).shuffle(files)
            f_start_id = 0
        else:
            f_start_id = checkpoint["files"][0]
            files = checkpoint["files"][1:]
            args.resume_from_checkpoint = False
            num_files = len(files)
            # may not exist in all checkpoints
            epoch = checkpoint.get("epoch", 0)
            restored_dataloader = checkpoint.get("data_loader", None)

        shared_file_list = {}

        if smp.is_initialized():
            dpsize = smp.dp_size()
            dprank = smp.dp_rank()
        elif torch.distributed.is_initialized():
            dpsize = get_world_size()
            dprank = get_rank()
        else:
            dpsize = 1
            dprank = 0
        dparallel = dpsize > 1
        if dparallel and dpsize > num_files:
            remainder = dpsize % num_files
            data_file = files[(f_start_id * dpsize + dprank +
                               remainder * f_start_id) % num_files]
        else:
            data_file = files[(f_start_id * dpsize + dprank) % num_files]

        previous_file = data_file

        if restored_data_loader is None:
            train_data = pretraining_dataset(data_file,
                                             args.max_predictions_per_seq)
            train_sampler = RandomSampler(train_data)
            train_dataloader = DataLoader(
                train_data,
                sampler=train_sampler,
                batch_size=args.train_batch_size * args.n_gpu,
                num_workers=4,
                worker_init_fn=worker_init,
                pin_memory=True,
                drop_last=True,
            )
            # shared_file_list["0"] = (train_dataloader, data_file)
        else:
            train_dataloader = restored_data_loader
            restored_data_loader = None

        overflow_buf = None
        if args.allreduce_post_accumulation:
            overflow_buf = torch.cuda.IntTensor([0])

        for f_id in range(f_start_id + 1, len(files)):
            if get_world_size() > num_files:
                data_file = files[(f_id * get_world_size() + get_rank() +
                                   remainder * f_id) % num_files]
            else:
                data_file = files[(f_id * get_world_size() + get_rank()) %
                                  num_files]

            previous_file = data_file

            dataset_future = pool.submit(
                create_pretraining_dataset,
                data_file,
                args.max_predictions_per_seq,
                shared_file_list,
                args,
                worker_init,
            )

            train_iter = (tqdm(train_dataloader,
                               desc="Iteration",
                               disable=args.disable_progress_bar)
                          if is_main_process() else train_dataloader)

            if raw_train_start is None:
                raw_train_start = time.time()

            for step, batch in enumerate(train_iter):
                training_steps += 1
                batch = [t.to(device) for t in batch]
                input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels = batch
                if args.do_train:
                    from smdistributed.modelparallel.test.torch.utils import dump_model, verify

                    model.train()
                    if args.smp > 0:
                        loss_mbs = smp_step(
                            args,
                            device,
                            input_ids,
                            segment_ids,
                            input_mask,
                            masked_lm_labels,
                            next_sentence_labels,
                            model,
                            optimizer,
                            criterion,
                            step,
                        )
                        loss = loss_mbs.reduce_mean()
                        if smp.rank() == 0:
                            print("Loss:", loss.item())
                    else:
                        loss = train_step(
                            args,
                            device,
                            input_ids,
                            segment_ids,
                            input_mask,
                            masked_lm_labels,
                            next_sentence_labels,
                            model,
                            optimizer,
                            criterion,
                            step,
                        )
                    divisor = 1
                    average_loss += loss.item()

                    if training_steps % args.gradient_accumulation_steps == 0:
                        lr_scheduler.step()  # learning rate warmup
                        global_step = take_optimizer_step(
                            args, optimizer, model, overflow_buf, global_step)

                    if global_step >= args.steps_this_run or timeout_sent:
                        train_time_raw = time.time() - raw_train_start
                        last_num_steps = (int(
                            training_steps / args.gradient_accumulation_steps)
                                          % args.log_freq)
                        last_num_steps = args.log_freq if last_num_steps == 0 else last_num_steps
                        average_loss = torch.tensor(
                            average_loss, dtype=torch.float32).cuda()
                        average_loss = average_loss / (last_num_steps *
                                                       divisor)
                        if torch.distributed.is_initialized():
                            average_loss /= get_world_size()
                            torch.distributed.all_reduce(average_loss)
                        final_loss = loss.item()
                    elif training_steps % (
                            args.log_freq *
                            args.gradient_accumulation_steps) == 0:
                        average_loss = 0

                    if (global_step >= args.steps_this_run or training_steps %
                        (args.num_steps_per_checkpoint *
                         args.gradient_accumulation_steps) == 0
                            or timeout_sent):
                        if smp.dp_rank() == 0 and not args.skip_checkpoint:
                            if args.resume_step < 0 or not args.phase2:
                                output_save_file = os.path.join(
                                    args.output_dir,
                                    "ckpt_{}.pt".format(global_step))
                            else:
                                output_save_file = os.path.join(
                                    args.output_dir,
                                    "ckpt_{}.pt".format(global_step +
                                                        args.phase1_end_step),
                                )
                            if args.do_train:
                                save_dict = {
                                    "model":
                                    model.local_state_dict(),
                                    "optimizer":
                                    optimizer.local_state_dict(),
                                    "files": [f_id] + files,
                                    "epoch":
                                    epoch,
                                    "data_loader":
                                    None if global_step >= args.steps_this_run
                                    else train_dataloader,
                                }
                                if args.fp16:
                                    save_dict["master params"] = list(
                                        amp.master_params(optimizer))
                                # SMP: Checkpoint mp_rank specific state
                                smp.save(save_dict,
                                         output_save_file,
                                         partial=True)

                                most_recent_ckpts_paths.append(
                                    output_save_file)
                                if len(most_recent_ckpts_paths) > 3 and (
                                        args.smp == 0 or smp.dp_rank() == 0):
                                    ckpt_to_be_removed = most_recent_ckpts_paths.pop(
                                        0)
                                    os.remove(ckpt_to_be_removed +
                                              f"_{smp.mp_rank()}")

                        # Exiting the training due to hitting max steps, or being sent a
                        # timeout from the cluster scheduler
                        if global_step >= args.steps_this_run or timeout_sent:
                            del train_dataloader
                            # thread.join()
                            if smp.dp_rank() == 0 and args.save_full:
                                output_save_file = os.path.join(
                                    args.output_dir,
                                    "ckpt_{}.pt".format(global_step))
                                save_dict = {
                                    "model":
                                    model.local_state_dict(),
                                    "optimizer":
                                    optimizer.local_state_dict(),
                                    "files": [f_id] + files,
                                    "epoch":
                                    epoch,
                                    "data_loader":
                                    None if global_step >= args.steps_this_run
                                    else train_dataloader,
                                }
                                if args.fp16:
                                    save_dict["master params"] = list(
                                        amp.master_params(optimizer))
                                # SMP: Save a single checkpoint containing entire model parameters
                                smp.save(save_dict,
                                         output_save_file,
                                         partial=False)
                            smp.barrier()
                            if smp.local_rank() == 0:
                                print(f"Start syncing model checkpoints to s3")
                                base_s3_path = os.path.dirname(
                                    os.path.dirname(
                                        os.getenv("SM_MODULE_DIR", "")))
                                curr_host = os.getenv("SM_CURRENT_HOST")
                                full_s3_path = f"{base_s3_path}/checkpoints/{curr_host}/"
                                sync_local_checkpoints_to_s3(
                                    local_path=args.output_dir,
                                    s3_path=full_s3_path)
                                print(
                                    f"Finished syncing model checkpoints to s3"
                                )
                            return args, final_loss, train_time_raw, global_step
                else:
                    model.eval()
                    with torch.no_grad():
                        loss = test_step(
                            args,
                            device,
                            input_ids,
                            segment_ids,
                            input_mask,
                            masked_lm_labels,
                            next_sentence_labels,
                            model,
                            criterion,
                            step,
                        )
                        print(f"global_step {global_step} Test Loss:", loss)
                        test_losses.append(loss)
                    global_step += 1
                    if global_step >= args.steps_this_run:
                        return sum(test_losses) / len(test_losses)

            del train_dataloader
            # thread.join()
            # Make sure pool has finished and switch train_dataloader
            # NOTE: Will block until complete
            train_dataloader, data_file = dataset_future.result(timeout=None)
        epoch += 1
def _run_tests(all_tests, log_name_base, extra_args):
    global stop, executor, futures, system_compiler
    xmlname = log_name_base + '.xml'
    junit_root = ET.Element('testsuites')
    conf_time = 0
    build_time = 0
    test_time = 0
    passing_tests = 0
    failing_tests = 0
    skipped_tests = 0
    commands = (compile_commands, clean_commands, install_commands, uninstall_commands)

    try:
        # This fails in some CI environments for unknown reasons.
        num_workers = multiprocessing.cpu_count()
    except Exception as e:
        print('Could not determine number of CPUs due to the following reason:' + str(e))
        print('Defaulting to using only one process')
        num_workers = 1
    # Due to Ninja deficiency, almost 50% of build time
    # is spent waiting. Do something useful instead.
    #
    # Remove this once the following issue has been resolved:
    # https://github.com/mesonbuild/meson/pull/2082
    num_workers *= 2
    executor = ProcessPoolExecutor(max_workers=num_workers)

    for name, test_cases, skipped in all_tests:
        current_suite = ET.SubElement(junit_root, 'testsuite', {'name': name, 'tests': str(len(test_cases))})
        print()
        if skipped:
            print(bold('Not running %s tests.' % name))
        else:
            print(bold('Running %s tests.' % name))
        print()
        futures = []
        for t in test_cases:
            # Jenkins screws us over by automatically sorting test cases by name
            # and getting it wrong by not doing logical number sorting.
            (testnum, testbase) = t.name.split(' ', 1)
            testname = '%.3d %s' % (int(testnum), testbase)
            should_fail = False
            if name.startswith('failing'):
                should_fail = name.split('failing-')[1]
            result = executor.submit(run_test, skipped, t.as_posix(), extra_args, system_compiler, backend, backend_flags, commands, should_fail)
            futures.append((testname, t, result))
        for (testname, t, result) in futures:
            sys.stdout.flush()
            result = result.result()
            if (result is None) or (('MESON_SKIP_TEST' in result.stdo) and (skippable(name, t.as_posix()))):
                print(yellow('Skipping:'), t.as_posix())
                current_test = ET.SubElement(current_suite, 'testcase', {'name': testname,
                                                                         'classname': name})
                ET.SubElement(current_test, 'skipped', {})
                skipped_tests += 1
            else:
                without_install = "" if len(install_commands) > 0 else " (without install)"
                if result.msg != '':
                    print(red('Failed test{} during {}: {!r}'.format(without_install, result.step.name, t.as_posix())))
                    print('Reason:', result.msg)
                    failing_tests += 1
                    if result.step == BuildStep.configure and result.mlog != no_meson_log_msg:
                        # For configure failures, instead of printing stdout,
                        # print the meson log if available since it's a superset
                        # of stdout and often has very useful information.
                        failing_logs.append(result.mlog)
                    else:
                        failing_logs.append(result.stdo)
                    failing_logs.append(result.stde)
                else:
                    print('Succeeded test%s: %s' % (without_install, t.as_posix()))
                    passing_tests += 1
                conf_time += result.conftime
                build_time += result.buildtime
                test_time += result.testtime
                total_time = conf_time + build_time + test_time
                log_text_file(logfile, t, result.stdo, result.stde)
                current_test = ET.SubElement(current_suite, 'testcase', {'name': testname,
                                                                         'classname': name,
                                                                         'time': '%.3f' % total_time})
                if result.msg != '':
                    ET.SubElement(current_test, 'failure', {'message': result.msg})
                stdoel = ET.SubElement(current_test, 'system-out')
                stdoel.text = result.stdo
                stdeel = ET.SubElement(current_test, 'system-err')
                stdeel.text = result.stde
    print("\nTotal configuration time: %.2fs" % conf_time)
    print("Total build time: %.2fs" % build_time)
    print("Total test time: %.2fs" % test_time)
    ET.ElementTree(element=junit_root).write(xmlname, xml_declaration=True, encoding='UTF-8')
    return passing_tests, failing_tests, skipped_tests
Exemple #32
0
year = args.year[0]
month = args.month[0]

sensitivity_file = Path("/home/simon/GMI_ERA5_V7_chansens18.txt")
output_path = Path(f"/gdata/simon/validation/gprof/{year}/{month:02}")
output_path.mkdir(exist_ok=True, parents=True)
log_path = Path("/home/simon/src/GPROF_2020_V1_4D_prf/log/")

# Find validation files.
path = Path(f"/gdata/simon/validation/preprocessor/{year}/{month:02}")
files = path.glob("*.pp")


def run_retrieval(f):
    stem = f.stem
    subprocess.run([
        "GPROF_2020_V1",
        str(f),
        str(output_path / (stem + ".BIN")),
        str(log_path / (stem + ".log")), "/qdata1/pbrown/gpm/ancillary/", "0"
    ])


pool = ProcessPoolExecutor(max_workers=42)
tasks = []
for f in files:
    tasks.append(pool.submit(run_retrieval, f))

for t in tqdm(tasks):
    t.result()
Exemple #33
0
import os, time, random
'''
submit: 异步提交任务
shutdown(wait=True): 相当于进程池的pool.close()+pool.join()操作
wait=True,等待池内所有任务执行完毕回收完资源后才继续
wait=False,立即返回,并不会等待池内的任务执行完毕
但不管wait参数为何值,整个程序都会等到所有任务执行完毕
submit和map必须在shutdown之前
'''


def task(n):
    print(f'{os.getpid()} is running')
    time.sleep(random.randint(1, 3))
    return n**2


if __name__ == '__main__':
    executor = ProcessPoolExecutor(max_workers=3)

    futures = []

    for i in range(11):
        future = executor.submit(task, i)
        futures.append(future)

    executor.shutdown()
    print('----- result is -----')
    for future in futures:
        print(future.result())
Exemple #34
0
# def process_go(*namelist):
#     tasks=[]
#     loop=asyncio.get_event_loop()
#     for name in namelist:
#         tasks.append(asyncio.ensure_future(hello(name)))
#     loop.run_until_complete(asyncio.wait(tasks))

if __name__ == '__main__':
    # executor = ThreadPoolExecutor(max_workers=3)
    executor = ProcessPoolExecutor(max_workers=3)

    f_list = []
    for url in URLS:
        '''asyncio.ensure_future('''
        # future = executor.submit(load_url,url)
        future = executor.submit(load_url, url)
        f_list.append(future)
    # 如果采用默认的ALL_COMPLETED,程序会阻塞直到线程池里面的所有任务都完成,再执行主线程:
    #如果采用FIRST_COMPLETED参数,程序并不会等到线程池里面所有的任务都完成。
    # for i, future in enumerate(as_completed(f_list, timeout=2400)):
    #
    #         data = future.result()
    #         print('data',data)

    #返回结果是个tuple,tuple里是两个集合,第一个是已经完成的,第二个是还没有完成的
    res_list = wait(f_list, return_when='FIRST_COMPLETED')
    # print('resss',res_list.result())
    # for each in res_list:
    #     each.result()
    print('res', type(res_list), res_list)
    print('00', res_list[0])
Exemple #35
0
class Laikad:
  def __init__(self, valid_const=("GPS", "GLONASS"), auto_update=False, valid_ephem_types=(EphemerisType.ULTRA_RAPID_ORBIT, EphemerisType.NAV),
               save_ephemeris=False, last_known_position=None):
    self.astro_dog = AstroDog(valid_const=valid_const, auto_update=auto_update, valid_ephem_types=valid_ephem_types, clear_old_ephemeris=True)
    self.gnss_kf = GNSSKalman(GENERATED_DIR)
    self.orbit_fetch_executor = ProcessPoolExecutor()
    self.orbit_fetch_future: Optional[Future] = None
    self.last_fetch_orbits_t = None
    self.last_cached_t = None
    self.save_ephemeris = save_ephemeris
    self.load_cache()
    self.posfix_functions = {constellation: get_posfix_sympy_fun(constellation) for constellation in (ConstellationId.GPS, ConstellationId.GLONASS)}
    self.last_pos_fix = last_known_position if last_known_position is not None else []
    self.last_pos_residual = []
    self.last_pos_fix_t = None

  def load_cache(self):
    cache = Params().get(EPHEMERIS_CACHE)
    if not cache:
      return
    try:
      cache = json.loads(cache, object_hook=deserialize_hook)
      self.astro_dog.add_orbits(cache['orbits'])
      self.astro_dog.add_navs(cache['nav'])
      self.last_fetch_orbits_t = cache['last_fetch_orbits_t']
    except json.decoder.JSONDecodeError:
      cloudlog.exception("Error parsing cache")

  def cache_ephemeris(self, t: GPSTime):
    if self.save_ephemeris and (self.last_cached_t is None or t - self.last_cached_t > SECS_IN_MIN):
      put_nonblocking(EPHEMERIS_CACHE, json.dumps(
        {'version': CACHE_VERSION, 'last_fetch_orbits_t': self.last_fetch_orbits_t, 'orbits': self.astro_dog.orbits, 'nav': self.astro_dog.nav},
        cls=CacheSerializer))
      self.last_cached_t = t

  def process_ublox_msg(self, ublox_msg, ublox_mono_time: int, block=False):
    if ublox_msg.which == 'measurementReport':
      t = ublox_mono_time * 1e-9
      report = ublox_msg.measurementReport
      if report.gpsWeek > 0:
        latest_msg_t = GPSTime(report.gpsWeek, report.rcvTow)
        self.fetch_orbits(latest_msg_t + SECS_IN_MIN, block)

      new_meas = read_raw_ublox(report)
      processed_measurements = process_measurements(new_meas, self.astro_dog)

      if self.last_pos_fix_t is None or abs(self.last_pos_fix_t - t) >= 2:
        min_measurements = 5 if any(p.constellation_id == ConstellationId.GLONASS for p in processed_measurements) else 4
        pos_fix, pos_fix_residual = calc_pos_fix_gauss_newton(processed_measurements, self.posfix_functions, min_measurements=min_measurements)
        if len(pos_fix) > 0:
          self.last_pos_fix = pos_fix[:3]
          self.last_pos_residual = pos_fix_residual
          self.last_pos_fix_t = t

      corrected_measurements = correct_measurements(processed_measurements, self.last_pos_fix, self.astro_dog) if self.last_pos_fix_t is not None else []

      self.update_localizer(self.last_pos_fix, t, corrected_measurements)
      kf_valid = all(self.kf_valid(t))
      ecef_pos = self.gnss_kf.x[GStates.ECEF_POS].tolist()
      ecef_vel = self.gnss_kf.x[GStates.ECEF_VELOCITY].tolist()

      pos_std = np.sqrt(abs(self.gnss_kf.P[GStates.ECEF_POS].diagonal())).tolist()
      vel_std = np.sqrt(abs(self.gnss_kf.P[GStates.ECEF_VELOCITY].diagonal())).tolist()

      meas_msgs = [create_measurement_msg(m) for m in corrected_measurements]
      dat = messaging.new_message("gnssMeasurements")
      measurement_msg = log.LiveLocationKalman.Measurement.new_message
      dat.gnssMeasurements = {
        "gpsWeek": report.gpsWeek,
        "gpsTimeOfWeek": report.rcvTow,
        "positionECEF": measurement_msg(value=ecef_pos, std=pos_std, valid=kf_valid),
        "velocityECEF": measurement_msg(value=ecef_vel, std=vel_std, valid=kf_valid),
        "positionFixECEF": measurement_msg(value=self.last_pos_fix, std=self.last_pos_residual, valid=self.last_pos_fix_t == t),
        "ubloxMonoTime": ublox_mono_time,
        "correctedMeasurements": meas_msgs
      }
      return dat
    elif ublox_msg.which == 'ephemeris':
      ephem = convert_ublox_ephem(ublox_msg.ephemeris)
      self.astro_dog.add_navs({ephem.prn: [ephem]})
      self.cache_ephemeris(t=ephem.epoch)
    # elif ublox_msg.which == 'ionoData':
    # todo add this. Needed to better correct messages offline. First fix ublox_msg.cc to sent them.

  def update_localizer(self, est_pos, t: float, measurements: List[GNSSMeasurement]):
    # Check time and outputs are valid
    valid = self.kf_valid(t)
    if not all(valid):
      if not valid[0]:
        cloudlog.info("Init gnss kalman filter")
      elif not valid[1]:
        cloudlog.error("Time gap of over 10s detected, gnss kalman reset")
      elif not valid[2]:
        cloudlog.error("Gnss kalman filter state is nan")
      if len(est_pos) > 0:
        cloudlog.info(f"Reset kalman filter with {est_pos}")
        self.init_gnss_localizer(est_pos)
      else:
        cloudlog.info("Could not reset kalman filter")
        return
    if len(measurements) > 0:
      kf_add_observations(self.gnss_kf, t, measurements)
    else:
      # Ensure gnss filter is updated even with no new measurements
      self.gnss_kf.predict(t)

  def kf_valid(self, t: float):
    filter_time = self.gnss_kf.filter.filter_time
    return [filter_time is not None,
            filter_time is not None and abs(t - filter_time) < MAX_TIME_GAP,
            all(np.isfinite(self.gnss_kf.x[GStates.ECEF_POS]))]

  def init_gnss_localizer(self, est_pos):
    x_initial, p_initial_diag = np.copy(GNSSKalman.x_initial), np.copy(np.diagonal(GNSSKalman.P_initial))
    x_initial[GStates.ECEF_POS] = est_pos
    p_initial_diag[GStates.ECEF_POS] = 1000 ** 2
    self.gnss_kf.init_state(x_initial, covs_diag=p_initial_diag)

  def fetch_orbits(self, t: GPSTime, block):
    if t not in self.astro_dog.orbit_fetched_times and (self.last_fetch_orbits_t is None or t - self.last_fetch_orbits_t > SECS_IN_HR):
      astro_dog_vars = self.astro_dog.valid_const, self.astro_dog.auto_update, self.astro_dog.valid_ephem_types
      if self.orbit_fetch_future is None:
        self.orbit_fetch_future = self.orbit_fetch_executor.submit(get_orbit_data, t, *astro_dog_vars)
        if block:
          self.orbit_fetch_future.result()
      if self.orbit_fetch_future.done():
        ret = self.orbit_fetch_future.result()
        self.last_fetch_orbits_t = t
        if ret:
          self.astro_dog.orbits, self.astro_dog.orbit_fetched_times = ret
          self.cache_ephemeris(t=t)
        self.orbit_fetch_future = None
Exemple #36
0
    fill = [fillAd, fillAe, fillCd, fillCe]
    H, A, B = matrizes.generate(b)
    matrix = matrizes.matrix()
    h = matrix[0:7, 5]
    iterations = 108

    allOccupancies = [1, 5, 10, 20, 30, 40, 50, 60, 90]
    allMethods = ['FIR', 'MF', 'MP', 'OMP', 'LS-OMP', 'GD', 'SSF', 'PCD', 'TAS', 'GDi', 'SSFi', 'PCDi', 'TASi']

    # occupancies utilized in some articles
    occupancies = [30, 60, 90]

    # main methods of all families tested
    methods = ['FIR', 'LS-OMP', 'TAS', 'PCDi']

    const = collections.OrderedDict(
        {'methods': methods, 'iterations': iterations, 'b': b, 'e': e, 'h': h, 'H': H, 'A': A, 'B': B, 'fill': fill,
         'matrix': matrix, 'window': window, 'totalSamples': totalSamples})

    testSystem = TestSystem(const)

    m = Manager()
    lock = m.Lock()
    pool = ProcessPoolExecutor()
    futures = [pool.submit(test, const, occupancy, lock) for occupancy in occupancies]
    for future in futures:
        testSystem.addData(future.result())

    # testSystem.graphViewer(methods, occupancies, 'ROC')
    testSystem.graphViewer(methods, occupancies, 'RMS')
Exemple #37
0
class DataRouter(object):
    def __init__(self,
                 project_dir=None,
                 max_training_processes=1,
                 response_log=None,
                 emulation_mode=None,
                 remote_storage=None,
                 component_builder=None,
                 model_server=None,
                 wait_time_between_pulls=None):
        self._training_processes = max(max_training_processes, 1)
        self._current_training_processes = 0
        self.responses = self._create_query_logger(response_log)
        self.project_dir = config.make_path_absolute(project_dir)
        self.emulator = self._create_emulator(emulation_mode)
        self.remote_storage = remote_storage
        self.model_server = model_server
        self.wait_time_between_pulls = wait_time_between_pulls

        if component_builder:
            self.component_builder = component_builder
        else:
            self.component_builder = ComponentBuilder(use_cache=True)

        self.project_store = self._create_project_store(project_dir)

        if six.PY3:
            # tensorflow sessions are not fork-safe,
            # and training processes have to be spawned instead of forked.
            # See https://github.com/tensorflow/tensorflow/issues/5448#issuecomment-258934405
            multiprocessing.set_start_method('spawn', force=True)

        self.pool = ProcessPool(self._training_processes)

    def __del__(self):
        """Terminates workers pool processes"""
        self.pool.shutdown()

    @staticmethod
    def _create_query_logger(response_log):
        """Create a logger that will persist incoming query results."""

        # Ensures different log files for different
        # processes in multi worker mode
        if response_log:
            # We need to generate a unique file name,
            # even in multiprocess environments
            timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
            log_file_name = "rasa_nlu_log-{}-{}.log".format(timestamp,
                                                            os.getpid())
            response_logfile = os.path.join(response_log, log_file_name)
            # Instantiate a standard python logger,
            # which we are going to use to log requests
            utils.create_dir_for_file(response_logfile)
            out_file = io.open(response_logfile, 'a', encoding='utf8')
            query_logger = Logger(
                    observer=jsonFileLogObserver(out_file, recordSeparator=''),
                    namespace='query-logger')
            # Prevents queries getting logged with parent logger
            # --> might log them to stdout
            logger.info("Logging requests to '{}'.".format(response_logfile))
            return query_logger
        else:
            # If the user didn't provide a logging directory, we wont log!
            logger.info("Logging of requests is disabled. "
                        "(No 'request_log' directory configured)")
            return None

    def _collect_projects(self, project_dir):
        if project_dir and os.path.isdir(project_dir):
            projects = os.listdir(project_dir)
        else:
            projects = []

        projects.extend(self._list_projects_in_cloud())
        return projects

    def _create_project_store(self,
                              project_dir):
        default_project = RasaNLUModelConfig.DEFAULT_PROJECT_NAME

        projects = self._collect_projects(project_dir)

        project_store = {}

        if self.model_server is not None:
            project_store[default_project] = load_from_server(
                    self.component_builder,
                    default_project,
                    self.project_dir,
                    self.remote_storage,
                    self.model_server,
                    self.wait_time_between_pulls
            )
        else:
            for project in projects:
                project_store[project] = Project(self.component_builder,
                                                 project,
                                                 self.project_dir,
                                                 self.remote_storage)

            if not project_store:
                project_store[default_project] = Project(
                        project=default_project,
                        project_dir=self.project_dir,
                        remote_storage=self.remote_storage
                )

        return project_store

    def _pre_load(self, projects):
        logger.debug("loading %s", projects)
        for project in self.project_store:
            if project in projects:
                self.project_store[project].load_model()

    def _list_projects_in_cloud(self):
        try:
            from rasa_nlu.persistor import get_persistor
            p = get_persistor(self.remote_storage)
            if p is not None:
                return p.list_projects()
            else:
                return []
        except Exception:
            logger.exception("Failed to list projects. Make sure you have "
                             "correctly configured your cloud storage "
                             "settings.")
            return []

    @staticmethod
    def _create_emulator(mode):
        """Create emulator for specified mode.

        If no emulator is specified, we will use the Rasa NLU format."""

        if mode is None:
            from rasa_nlu.emulators import NoEmulator
            return NoEmulator()
        elif mode.lower() == 'wit':
            from rasa_nlu.emulators.wit import WitEmulator
            return WitEmulator()
        elif mode.lower() == 'luis':
            from rasa_nlu.emulators.luis import LUISEmulator
            return LUISEmulator()
        elif mode.lower() == 'dialogflow':
            from rasa_nlu.emulators.dialogflow import DialogflowEmulator
            return DialogflowEmulator()
        else:
            raise ValueError("unknown mode : {0}".format(mode))

    @staticmethod
    def _tf_in_pipeline(model_config):
        # type: (RasaNLUModelConfig) -> bool
        from rasa_nlu.classifiers.embedding_intent_classifier import \
            EmbeddingIntentClassifier
        return EmbeddingIntentClassifier.name in model_config.component_names

    def extract(self, data):
        return self.emulator.normalise_request_json(data)

    def parse(self, data):
        project = data.get("project", RasaNLUModelConfig.DEFAULT_PROJECT_NAME)
        model = data.get("model")
        if project not in self.project_store:
            projects = self._list_projects(self.project_dir)

            cloud_provided_projects = self._list_projects_in_cloud()
            projects.extend(cloud_provided_projects)

            if project not in projects:
                raise InvalidProjectError(
                        "No project found with name '{}'.".format(project))
            else:
                try:
                    self.project_store[project] = Project(
                            self.component_builder, project,
                            self.project_dir, self.remote_storage)
                except Exception as e:
                    raise InvalidProjectError(
                            "Unable to load project '{}'. "
                            "Error: {}".format(project, e))

        time = data.get('time')
        response = self.project_store[project].parse(data['text'], time,
                                                     model)

        if self.responses:
            self.responses.info('', user_input=response, project=project,
                                model=response.get('model'))

        return self.format_response(response)

    @staticmethod
    def _list_projects(path):
        """List the projects in the path, ignoring hidden directories."""
        return [os.path.basename(fn)
                for fn in utils.list_subdirectories(path)]

    def parse_training_examples(self, examples, project, model):
        # type: (Optional[List[Message]], Text, Text) -> List[Dict[Text, Text]]
        """Parses a list of training examples to the project interpreter"""

        predictions = []
        for ex in examples:
            logger.debug("Going to parse: {}".format(ex.as_dict()))
            response = self.project_store[project].parse(ex.text,
                                                         None,
                                                         model)
            logger.debug("Received response: {}".format(response))
            predictions.append(response)

        return predictions

    def format_response(self, data):
        return self.emulator.normalise_response_json(data)

    def get_status(self):
        # This will only count the trainings started from this
        # process, if run in multi worker mode, there might
        # be other trainings run in different processes we don't know about.

        return {
            "max_training_processes": self._training_processes,
            "current_training_processes": self._current_training_processes,
            "available_projects": {
                name: project.as_dict()
                for name, project in self.project_store.items()
            }
        }

    def start_train_process(self,
                            data_file,  # type: Text
                            project,  # type: Text
                            train_config,  # type: RasaNLUModelConfig
                            model_name=None  # type: Optional[Text]
                            ):
        # type: (...) -> Deferred
        """Start a model training."""

        if not project:
            raise InvalidProjectError("Missing project name to train")

        if self._training_processes <= self._current_training_processes:
            raise MaxTrainingError

        if project in self.project_store:
            self.project_store[project].status = 1
        elif project not in self.project_store:
            self.project_store[project] = Project(
                    self.component_builder, project,
                    self.project_dir, self.remote_storage)
            self.project_store[project].status = 1

        def training_callback(model_path):
            model_dir = os.path.basename(os.path.normpath(model_path))
            self.project_store[project].update(model_dir)
            self._current_training_processes -= 1
            self.project_store[project].current_training_processes -= 1
            if (self.project_store[project].status == 1 and
                    self.project_store[project].current_training_processes ==
                    0):
                self.project_store[project].status = 0
            return model_dir

        def training_errback(failure):
            logger.warning(failure)
            target_project = self.project_store.get(
                    failure.value.failed_target_project)
            self._current_training_processes -= 1
            self.project_store[project].current_training_processes -= 1
            if (target_project and
                    self.project_store[project].current_training_processes ==
                    0):
                target_project.status = 0
            return failure

        logger.debug("New training queued")

        self._current_training_processes += 1
        self.project_store[project].current_training_processes += 1

        # tensorflow training is not executed in a separate thread on python 2,
        # as this may cause training to freeze
        if six.PY2 and self._tf_in_pipeline(train_config):
            try:
                logger.warning("Training a pipeline with a tensorflow "
                               "component. This blocks the server during "
                               "training.")
                model_path = do_train_in_worker(
                        train_config,
                        data_file,
                        path=self.project_dir,
                        project=project,
                        fixed_model_name=model_name,
                        storage=self.remote_storage)
                model_dir = os.path.basename(os.path.normpath(model_path))
                training_callback(model_dir)
                return model_dir
            except TrainingException as e:
                logger.warning(e)
                target_project = self.project_store.get(
                        e.failed_target_project)
                if target_project:
                    target_project.status = 0
                raise e
        else:
            result = self.pool.submit(do_train_in_worker,
                                      train_config,
                                      data_file,
                                      path=self.project_dir,
                                      project=project,
                                      fixed_model_name=model_name,
                                      storage=self.remote_storage)
            result = deferred_from_future(result)
            result.addCallback(training_callback)
            result.addErrback(training_errback)

            return result

    def evaluate(self, data, project=None, model=None):
        # type: (Text, Optional[Text], Optional[Text]) -> Dict[Text, Any]
        """Perform a model evaluation."""

        project = project or RasaNLUModelConfig.DEFAULT_PROJECT_NAME
        model = model or None
        file_name = utils.create_temporary_file(data, "_training_data")
        test_data = load_data(file_name)

        if project not in self.project_store:
            raise InvalidProjectError("Project {} could not "
                                      "be found".format(project))

        preds_json = self.parse_training_examples(test_data.intent_examples,
                                                  project,
                                                  model)

        predictions = [
            {"text": e.text,
             "intent": e.data.get("intent"),
             "predicted": p.get("intent", {}).get("name"),
             "confidence": p.get("intent", {}).get("confidence")}
            for e, p in zip(test_data.intent_examples, preds_json)
        ]

        y_true = [e.data.get("intent") for e in test_data.intent_examples]
        y_true = clean_intent_labels(y_true)

        y_pred = [p.get("intent", {}).get("name") for p in preds_json]
        y_pred = clean_intent_labels(y_pred)

        report, precision, f1, accuracy = get_evaluation_metrics(y_true,
                                                                 y_pred)

        return {
            "intent_evaluation": {
                "report": report,
                "predictions": predictions,
                "precision": precision,
                "f1_score": f1,
                "accuracy": accuracy}
        }

    def unload_model(self, project, model):
        # type: (Text, Text) -> Dict[Text]
        """Unload a model from server memory."""

        if project is None:
            raise InvalidProjectError("No project specified")
        elif project not in self.project_store:
            raise InvalidProjectError("Project {} could not "
                                      "be found".format(project))

        try:
            unloaded_model = self.project_store[project].unload(model)
            return unloaded_model
        except KeyError:
            raise InvalidProjectError("Failed to unload model {} "
                                      "for project {}.".format(model, project))
Exemple #38
0
def synthesize(hp, speaker_id='', num_sentences=0, ncores=1, topoutdir='', t2m_epoch=-1, ssrn_epoch=-1):
    '''
    topoutdir: store samples under here; defaults to hp.sampledir
    t2m_epoch and ssrn_epoch: default -1 means use latest. Otherwise go to archived models.
    '''
    assert hp.vocoder in ['griffin_lim', 'world'], 'Other vocoders than griffin_lim/world not yet supported'

    dataset = load_data(hp, mode="synthesis") #since mode != 'train' or 'validation', will load test_transcript rather than transcript
    fpaths, L = dataset['fpaths'], dataset['texts']
    position_in_phone_data = duration_data = labels = None # default
    if hp.use_external_durations:
        duration_data = dataset['durations']
        if num_sentences > 0:
            duration_data = duration_data[:num_sentences, :, :]

    if 'position_in_phone' in hp.history_type:
        ## TODO: combine + deduplicate with relevant code in train.py for making validation set
        def duration2position(duration, fractional=False):     
            ### very roundabout -- need to deflate A matrix back to integers:
            duration = duration.sum(axis=0)
            #print(duration)
            # sys.exit('evs')   
            positions = durations_to_position(duration, fractional=fractional)
            ###positions = end_pad_for_reduction_shape_sync(positions, hp)
            positions = positions[0::hp.r, :]         
            #print(positions)
            return positions

        position_in_phone_data = [duration2position(dur, fractional=('fractional' in hp.history_type)) \
                        for dur in duration_data]       
        position_in_phone_data = list2batch(position_in_phone_data, hp.max_T)



    # Ensure we aren't trying to generate more utterances than are actually in our test_transcript
    if num_sentences > 0:
        assert num_sentences <= len(fpaths)
        L = L[:num_sentences, :]
        fpaths = fpaths[:num_sentences]

    bases = [basename(fpath) for fpath in fpaths]

    if hp.merlin_label_dir:
        labels = []
        for fpath in fpaths:
            label = np.load("{}/{}".format(hp.merlin_label_dir, basename(fpath)+".npy"))
            if hp.select_central:
                central_ind = get_labels_indices(hp.merlin_lab_dim)
                label = label[:,central_ind==1] 
            labels.append(label)

        labels = list2batch(labels, hp.max_N)


    if speaker_id:
        speaker2ix = dict(zip(hp.speaker_list, range(len(hp.speaker_list))))
        speaker_ix = speaker2ix[speaker_id]

        ## Speaker codes are held in (batch, 1) matrix -- tiling is done inside the graph:
        speaker_data = np.ones((len(L), 1))  *  speaker_ix
    else:
        speaker_data = None
   
    if hp.turn_off_monotonic_for_synthesis: # if FIA mechanism is turn off
        text_lengths = get_text_lengths(L)
        hp.text_lengths = text_lengths + 1
     
    # Load graph 
    ## TODO: generalise to combine other types of models into a synthesis pipeline?
    g1 = Text2MelGraph(hp, mode="synthesize"); print("Graph 1 (t2m) loaded")

    if hp.norm == None :
        t2m_layer_norm = False
        hp.norm = 'layer'
        hp.lr = 0.001
        hp.beta1 = 0.9
        hp.beta2 = 0.999
        hp.epsilon = 0.00000001
        hp.decay_lr = True
        hp.batchsize = {'t2m': 32, 'ssrn': 8}
    else:
        t2m_layer_norm = True

    g2 = SSRNGraph(hp, mode="synthesize"); print("Graph 2 (ssrn) loaded")

    if t2m_layer_norm == False:
        hp.norm = None
        hp.lr = 0.0002
        hp.beta1 = 0.5
        hp.beta2 = 0.9
        hp.epsilon = 0.000001
        hp.decay_lr = False
        hp.batchsize = {'t2m': 16, 'ssrn': 8}
    
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        ### TODO: specify epoch from comm line?
        ### TODO: t2m and ssrn from separate configs?

        if t2m_epoch > -1:
            restore_archived_model_parameters(sess, hp, 't2m', t2m_epoch)
        else:
            t2m_epoch = restore_latest_model_parameters(sess, hp, 't2m')

        if ssrn_epoch > -1:    
            restore_archived_model_parameters(sess, hp, 'ssrn', ssrn_epoch)
        else:
            ssrn_epoch = restore_latest_model_parameters(sess, hp, 'ssrn')

        # Pass input L through Text2Mel Graph
        t = start_clock('Text2Mel generating...')
        ### TODO: after futher efficiency testing, remove this fork
        if 1:  ### efficient route -- only make K&V once  ## 3.86, 3.70, 3.80 seconds (2 sentences)
            text_lengths = get_text_lengths(L)
            K, V = encode_text(hp, L, g1, sess, speaker_data=speaker_data, labels=labels)
            Y, lengths, alignments = synth_codedtext2mel(hp, K, V, text_lengths, g1, sess, \
                                speaker_data=speaker_data, duration_data=duration_data, \
                                position_in_phone_data=position_in_phone_data,\
                                labels=labels)
        else: ## 5.68, 5.43, 5.38 seconds (2 sentences)
            Y, lengths = synth_text2mel(hp, L, g1, sess, speaker_data=speaker_data, \
                                            duration_data=duration_data, \
                                            position_in_phone_data=position_in_phone_data, \
                                            labels=labels)
        stop_clock(t)

        ### TODO: useful to test this?
        # print(Y[0,:,:])
        # print (np.isnan(Y).any())
        # print('nan1')
        # Then pass output Y of Text2Mel Graph through SSRN graph to get high res spectrogram Z.
        t = start_clock('Mel2Mag generating...')
        Z = synth_mel2mag(hp, Y, g2, sess)
        stop_clock(t) 

        if (np.isnan(Z).any()):  ### TODO: keep?
            Z = np.nan_to_num(Z)

        # Generate wav files
        if not topoutdir:
            topoutdir = hp.sampledir
        outdir = os.path.join(topoutdir, 't2m%s_ssrn%s'%(t2m_epoch, ssrn_epoch))
        if speaker_id:
            outdir += '_speaker-%s'%(speaker_id)
        safe_makedir(outdir)

        # Plot trimmed attention alignment with filename
        print("Plot attention, will save to following dir: %s"%(outdir))
        print("File |  CDP | Ain")
        for i, mag in enumerate(Z):
            outfile = os.path.join(outdir, bases[i])
            trimmed_alignment = alignments[i,:text_lengths[i],:lengths[i]]
            plot_alignment(hp, trimmed_alignment, utt_idx=i+1, t2m_epoch=t2m_epoch, dir=outdir, outfile=outfile)
            CDP = getCDP(trimmed_alignment)
            APin, APout = getAP(trimmed_alignment)
            print("%s | %.2f | %.2f"%( bases[i], CDP, APin))

        print("Generating wav files, will save to following dir: %s"%(outdir))

        
        assert hp.vocoder in ['griffin_lim', 'world'], 'Other vocoders than griffin_lim/world not yet supported'

        if ncores==1:
            for i, mag in tqdm(enumerate(Z)):
                outfile = os.path.join(outdir, bases[i] + '.wav')
                mag = mag[:lengths[i]*hp.r,:]  ### trim to generated length
                synth_wave(hp, mag, outfile)
        else:
            executor = ProcessPoolExecutor(max_workers=ncores)    
            futures = []
            for i, mag in tqdm(enumerate(Z)):
                outfile = os.path.join(outdir, bases[i] + '.wav')
                mag = mag[:lengths[i]*hp.r,:]  ### trim to generated length
                futures.append(executor.submit(synth_wave, hp, mag, outfile))
            proc_list = [future.result() for future in tqdm(futures)]
    resample_to_16k(origin_wavpath, target_wavpath, num_workers=num_workers)

    # WE only use 10 speakers listed below for this experiment.
    speaker_used = ['262', '272']
    speaker_used = ['p' + i for i in speaker_used]

    ## Next we are to extract the acoustic features (MCEPs, lf0) and compute the corresponding stats (means, stds).
    # Make dirs to contain the MCEPs
    os.makedirs(mc_dir_train, exist_ok=True)
    os.makedirs(mc_dir_test, exist_ok=True)

    num_workers = len(speaker_used)  #cpu_count()
    print("number of workers: ", num_workers)
    executor = ProcessPoolExecutor(max_workers=num_workers)

    work_dir = target_wavpath
    # spk_folders = os.listdir(work_dir)
    # print("processing {} speaker folders".format(len(spk_folders)))
    # print(spk_folders)

    futures = []
    for spk in speaker_used:
        spk_path = os.path.join(work_dir, spk)
        futures.append(
            executor.submit(
                partial(get_spk_world_feats, spk_path, mc_dir_train,
                        mc_dir_test, sample_rate)))
    result_list = [future.result() for future in tqdm(futures)]
    print(result_list)
    sys.exit(0)
Exemple #40
0
import os
import time
from concurrent.futures import ProcessPoolExecutor


def double(x):
    print(os.getpid())
    return x * 2


if __name__ == "__main__":
    print(os.getpid())
    values = [1, 2, 3, 4, 3]
    t0 = time.time()

    results = list(map(double, values))
    t1 = time.time()
    print(t1 - t0)
    executor = ProcessPoolExecutor()

    task = executor.submit(list(map(double, values)))
    print("I am main program")
    t2 = time.time()
    print(t2 - t1)

    task = executor.submit(list(map(double, values)))
    print(type(task.result))
    print(results)
Exemple #41
0
    print('Making directories for MCEPs...')
    os.makedirs(mc_dir_train, exist_ok=True)
    os.makedirs(mc_dir_test, exist_ok=True)

    num_workers = len(speaker_dirs)
    print(f'Number of workers: {num_workers}')
    executer = ProcessPoolExecutor(max_workers=num_workers)

    futures = []
    if perform_data_split == 'n':
        # current wavs working with (train)
        working_train_dir = target_wavpath_train
        for spk in tqdm(speaker_dirs):
            print(speaker_dirs)
            spk_dir = os.path.join(working_train_dir, spk)
            futures.append(executer.submit(partial(process_spk, spk_dir, mc_dir_train)))

        # current wavs working with (eval)
        working_eval_dir = target_wavpath_eval
        for spk in tqdm(speaker_dirs):
            spk_dir = os.path.join(working_eval_dir, spk)
            futures.append(executer.submit(partial(process_spk, spk_dir, mc_dir_test)))
    else:
        # current wavs we are working with (all for data split)
        working_dir = target_wavpath
        for spk in tqdm(speaker_dirs):
            spk_dir = os.path.join(working_dir, spk)
            futures.append(executer.submit(partial(process_spk_with_split, spk_dir, mc_dir_train, mc_dir_test)))

    result_list = [future.result() for future in tqdm(futures)]
    print('Completed:')
Exemple #42
0
from concurrent.futures import ProcessPoolExecutor
import time


def sleeper(id_):
    print(f'{id_} started')
    time.sleep(2)
    print(f'{id_} ended')


pool = ProcessPoolExecutor(max_workers=5)

futures = []

for i in range(1, 13, 1):
    f = pool.submit(sleeper, i)
    futures.append(f)

for i in futures:
    i.result()
Exemple #43
0
def test_no_connection_sharing_among_processes(s3):
    executor = ProcessPoolExecutor()
    conn_id = executor.submit(_get_s3_id, s3).result()
    assert id(s3.connect()) != conn_id, \
        "Processes should not share S3 connections."
Exemple #44
0
    threads *= 4
tpool = ProcessPoolExecutor(threads)

# show device ids
rs = client.query("show series")
dev_ids = list(map(lambda x: x[0].split('=')[1],
                   rs.raw['series'][0]['values']))

import csv


def qry(dev_id: str):
    rs = client.query(
        f"select * from acc_data where dev_id='{dev_id}' and time >= '2019-03-26T02:00:00Z' AND time <= '2019-03-26T03:00:00Z'"
    )
    with open(f'{dev_id}_output.csv', 'w', newline='') as f:
        csv_writer = csv.writer(f)
        for line in rs.raw['series'][0]['values']:
            csv_writer.writerow(line)


start = time.time()

futures = []

for id in dev_ids:
    futures.append(tpool.submit(qry, id))

print("program end")
print("program processing time: ", time.time() - start)
Exemple #45
0

def setup_routes(app):
    app.router.add_get('/data', get_all_data)
    app.router.add_get('/data/{mac}', get_data)


if __name__ == '__main__':
    tags = {
        'F4:A5:74:89:16:57': 'kitchen',
        'CC:2C:6A:1E:59:3D': 'bedroom',
        'BB:2C:6A:1E:59:3D': 'livingroom'
    }

    m = Manager()
    q = m.Queue()

    # Start background process
    executor = ProcessPoolExecutor(1)
    executor.submit(run_get_data_background, list(tags.keys()), q)

    loop = asyncio.get_event_loop()

    # Start data updater
    loop.create_task(data_update(q))

    # Setup and start web application
    app = web.Application(loop=loop)
    setup_routes(app)
    web.run_app(app, host='0.0.0.0', port=5000)
Exemple #46
0
def run_in_process(sync_fn, *args):
    pool = ProcessPoolExecutor(max_workers=1)
    result = yield pool.submit(sync_fn, *args)
    pool.shutdown()
    return result
Exemple #47
0
def build_from_path(in_dir,
                    out_dir,
                    filelist_names,
                    num_workers=16,
                    tqdm=lambda x: x):
    wav_paths = []
    # for all speakers, count index and either add to train_list/eval_list/test_list
    # Create wav path list
    wav_paths = glob.glob(os.path.join(in_dir, 'wav_16000', '*', '*.wav'))

    books = glob.glob(os.path.join(in_dir, 'pron', '*.txt'))
    books.sort()
    texts2d = [[] for i in range(len(books))]
    for i in range(len(books)):
        with open(books[i], 'r', encoding='utf-8-sig') as f:
            lines = f.readlines()
        texts2d[i] = lines

    for i in range(len(texts2d)):
        for j in range(len(texts2d[i])):
            text = texts2d[i][j].strip()
            texts2d[i][j] = text

    path = os.path.join(in_dir, 'wav_22050')
    if not os.path.exists(path):
        os.makedirs(path)
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []
    futures_val = []
    futures_test = []
    index = 1
    for wav_path in wav_paths:
        wav_filename = os.path.basename(wav_path)
        lists = wav_filename.split('_')
        speaker = lists[0]
        book = int(lists[1][1:3]) - 1
        sentence = int(lists[2][1:3]) - 1
        try:
            text = texts2d[book][sentence]
        except:
            print('ERROR! OUT OF RANGE: {}'.format(wav_filename))
        out_path = wav_path.replace('wav_16000', 'wav_22050')
        dir = os.path.dirname(out_path)
        if not os.path.exists(dir):
            os.makedirs(dir)
        if int(index) % 400 == 0:
            futures_val.append(
                executor.submit(
                    partial(_process_utterance, wav_path, out_path, speaker,
                            text)))
        elif int(index) % 400 == 1:
            futures_test.append(
                executor.submit(
                    partial(_process_utterance, wav_path, out_path, speaker,
                            text)))
        else:
            futures.append(
                executor.submit(
                    partial(_process_utterance, wav_path, out_path, speaker,
                            text)))
        index += 1
    write_metadata([future.result() for future in tqdm(futures)], out_dir,
                   filelist_names[0])
    write_metadata([future.result() for future in tqdm(futures_val)], out_dir,
                   filelist_names[1])
    write_metadata([future.result() for future in tqdm(futures_test)], out_dir,
                   filelist_names[2])
Exemple #48
0
    def process_file_list(self, run_args):
        """
        Process a single image tile < 5000x5000 in size.
        """
        for variable, value in run_args.items():
            self.__setattr__(variable, value)
        assert self.mem_usage < 1.0 and self.mem_usage > 0.0

        # * depend on the number of samples and their size, this may be less efficient
        patterning = lambda x: re.sub("([\[\]])", "[\\1]", x)
        file_path_list = glob.glob(patterning("%s/*" % self.input_dir))
        file_path_list.sort()  # ensure same order
        assert len(file_path_list) > 0, 'Not Detected Any Files From Path'

        rm_n_mkdir(self.output_dir + '/json/')
        rm_n_mkdir(self.output_dir + '/mat/')
        rm_n_mkdir(self.output_dir + '/overlay/')
        if self.save_qupath:
            rm_n_mkdir(self.output_dir + "/qupath/")

        def proc_callback(results):
            """Post processing callback.
            
            Output format is implicit assumption, taken from `_post_process_patches`

            """
            img_name, pred_map, pred_inst, inst_info_dict, overlaid_img = results

            inst_type = [[k, v["type"]] for k, v in inst_info_dict.items()]
            inst_type = np.array(inst_type)
            mat_dict = {
                "inst_map": pred_inst,
                "inst_type": inst_type,
            }
            if self.nr_types is None:  # matlab does not have None type array
                mat_dict.pop("inst_type", None)

            if self.save_raw_map:
                mat_dict["raw_map"] = pred_map
            save_path = "%s/mat/%s.mat" % (self.output_dir, img_name)
            sio.savemat(save_path, mat_dict)

            save_path = "%s/overlay/%s.png" % (self.output_dir, img_name)
            cv2.imwrite(save_path, cv2.cvtColor(overlaid_img,
                                                cv2.COLOR_RGB2BGR))

            if self.save_qupath:
                nuc_val_list = list(inst_info_dict.values())
                nuc_type_list = np.array([v["type"] for v in nuc_val_list])
                nuc_coms_list = np.array([v["centroid"] for v in nuc_val_list])
                save_path = "%s/qupath/%s.tsv" % (self.output_dir, img_name)
                convert_format.to_qupath(save_path, nuc_coms_list,
                                         nuc_type_list, self.type_info_dict)

            save_path = "%s/json/%s.json" % (self.output_dir, img_name)
            self.__save_json(save_path, inst_info_dict, None)
            return img_name

        def detach_items_of_uid(items_list, uid, nr_expected_items):
            item_counter = 0
            detached_items_list = []
            remained_items_list = []
            while True:
                pinfo, pdata = items_list.pop(0)
                pinfo = np.squeeze(pinfo)
                if pinfo[-1] == uid:
                    detached_items_list.append([pinfo, pdata])
                    item_counter += 1
                else:
                    remained_items_list.append([pinfo, pdata])
                if item_counter == nr_expected_items:
                    break
            # do this to ensure the ordering
            remained_items_list = remained_items_list + items_list
            return detached_items_list, remained_items_list

        proc_pool = None
        if self.nr_post_proc_workers > 0:
            proc_pool = ProcessPoolExecutor(self.nr_post_proc_workers)

        while len(file_path_list) > 0:

            hardware_stats = psutil.virtual_memory()
            available_ram = getattr(hardware_stats, "available")
            available_ram = int(available_ram * self.mem_usage)
            # available_ram >> 20 for MB, >> 30 for GB

            # TODO: this portion looks clunky but seems hard to detach into separate func

            # * caching N-files into memory such that their expected (total) memory usage
            # * does not exceed the designated percentage of currently available memory
            # * the expected memory is a factor w.r.t original input file size and
            # * must be manually provided
            file_idx = 0
            use_path_list = []
            cache_image_list = []
            cache_patch_info_list = []
            cache_image_info_list = []
            while len(file_path_list) > 0:
                file_path = file_path_list.pop(0)

                img = cv2.imread(file_path)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                src_shape = img.shape

                img, patch_info, top_corner = _prepare_patching(
                    img, self.patch_input_shape, self.patch_output_shape, True)
                self_idx = np.full(patch_info.shape[0],
                                   file_idx,
                                   dtype=np.int32)
                patch_info = np.concatenate([patch_info, self_idx[:, None]],
                                            axis=-1)
                # ? may be expensive op
                patch_info = np.split(patch_info, patch_info.shape[0], axis=0)
                patch_info = [np.squeeze(p) for p in patch_info]

                # * this factor=5 is only applicable for HoVerNet
                expected_usage = sys.getsizeof(img) * 5
                available_ram -= expected_usage
                if available_ram < 0:
                    break

                file_idx += 1
                # if file_idx == 4: break
                use_path_list.append(file_path)
                cache_image_list.append(img)
                cache_patch_info_list.extend(patch_info)
                # TODO: refactor to explicit protocol
                cache_image_info_list.append(
                    [src_shape, len(patch_info), top_corner])

            # * apply neural net on cached data
            dataset = SerializeFileList(cache_image_list,
                                        cache_patch_info_list,
                                        self.patch_input_shape)

            dataloader = data.DataLoader(
                dataset,
                num_workers=self.nr_inference_workers,
                batch_size=self.batch_size,
                drop_last=False,
            )

            pbar = tqdm.tqdm(
                desc="Process Patches",
                leave=True,
                total=int(len(cache_patch_info_list) / self.batch_size) + 1,
                ncols=80,
                ascii=True,
                position=0,
            )

            accumulated_patch_output = []
            for batch_idx, batch_data in enumerate(dataloader):
                sample_data_list, sample_info_list = batch_data
                sample_output_list = self.run_step(sample_data_list)
                sample_info_list = sample_info_list.numpy()
                curr_batch_size = sample_output_list.shape[0]
                sample_output_list = np.split(sample_output_list,
                                              curr_batch_size,
                                              axis=0)
                sample_info_list = np.split(sample_info_list,
                                            curr_batch_size,
                                            axis=0)
                sample_output_list = list(
                    zip(sample_info_list, sample_output_list))
                accumulated_patch_output.extend(sample_output_list)
                pbar.update()
            pbar.close()

            # * parallely assemble the processed cache data for each file if possible
            future_list = []
            for file_idx, file_path in enumerate(use_path_list):
                image_info = cache_image_info_list[file_idx]
                file_ouput_data, accumulated_patch_output = detach_items_of_uid(
                    accumulated_patch_output, file_idx, image_info[1])

                # * detach this into func and multiproc dispatch it
                src_pos = image_info[
                    2]  # src top left corner within padded image
                src_image = cache_image_list[file_idx]
                src_image = src_image[src_pos[0]:src_pos[0] + image_info[0][0],
                                      src_pos[1]:src_pos[1] +
                                      image_info[0][1], ]

                base_name = pathlib.Path(file_path).stem
                file_info = {
                    "src_shape": image_info[0],
                    "src_image": src_image,
                    "name": base_name,
                }

                post_proc_kwargs = {
                    "nr_types": self.nr_types,
                    "return_centroids": True,
                }  # dynamicalize this

                overlay_kwargs = {
                    "draw_dot": self.draw_dot,
                    "type_colour": self.type_info_dict,
                    "line_thickness": 2,
                }
                func_args = (
                    self.post_proc_func,
                    post_proc_kwargs,
                    file_ouput_data,
                    file_info,
                    overlay_kwargs,
                )

                # dispatch for parallel post-processing
                if proc_pool is not None:
                    proc_future = proc_pool.submit(_post_process_patches,
                                                   *func_args)
                    # ! manually poll future and call callback later as there is no guarantee
                    # ! that the callback is called from main thread
                    future_list.append(proc_future)
                else:
                    proc_output = _post_process_patches(*func_args)
                    proc_callback(proc_output)

            if proc_pool is not None:
                # loop over all to check state a.k.a polling
                for future in as_completed(future_list):
                    # TODO: way to retrieve which file crashed ?
                    # ! silent crash, cancel all and raise error
                    if future.exception() is not None:
                        log_info("Silent Crash")
                        # ! cancel somehow leads to cascade error later
                        # ! so just poll it then crash once all future
                        # ! acquired for now
                        # for future in future_list:
                        #     future.cancel()
                        # break
                    else:
                        file_path = proc_callback(future.result())
                        log_info("Done Assembling %s" % file_path)
        return
def process_pool_executor_handler(executor: ProcessPoolExecutor,
                                  manager: DownloadProcess,
                                  file_maps: Dict[str, str],
                                  directory: str) -> None:
    done_queue = JoinableQueue()

    def update_hook(future: Future):
        temp = future.result()
        if temp:
            for failed_links in temp:
                done_queue.put(failed_links)

    while manager.done_retries != manager.max_retries:
        print(
            f"Starting download {manager.get_total_links() - manager.get_total_downloaded_links_count()} links left"
        )
        available_cpus = [0, 1, 2, 3
                          ] if platform.system() == "Windows" else list(
                              os.sched_getaffinity(os.getpid()))
        print(
            f"available cpu's {available_cpus}, initializing {4 * manager.get_process_num()}"
            f" threads with {manager.get_thread_num()} links per "
            f"process")

        if len(manager.error_links):
            download_links = manager.error_links.copy()
            manager.error_links = []
        else:
            download_links = manager.get_download_links().copy()

        process_futures: List[Future] = []

        start = 0
        for temp_num in range(len(download_links)):
            end = start + manager.get_thread_num()

            if end > len(download_links):
                end = len(download_links)

            cpu_num = available_cpus[temp_num % len(available_cpus)]
            process_futures.append(
                executor.submit(start_threads, download_links[start:end],
                                file_maps, manager.get_session(), directory,
                                manager.http2, manager.debug, cpu_num))
            process_futures[-1].add_done_callback(update_hook)
            start = end

            if end >= len(download_links):
                break

        wait(process_futures)

        while not done_queue.empty():
            link = done_queue.get()
            manager.error_links.append(link)

        manager.set_total_downloaded_links_count(manager.get_total_links() -
                                                 len(manager.error_links))

        if manager.debug:
            print(
                f"Total downloaded links {manager.get_total_downloaded_links_count()}"
            )
            print(f"Error links generated {len(manager.error_links)}")

        if len(manager.error_links):
            manager.set_thread_num(
                int(
                    ceil((manager.get_total_links() -
                          manager.get_total_downloaded_links_count()) /
                         manager.get_process_num())))
            print(
                f"{manager.get_total_links()} was expected but "
                f"{manager.get_total_downloaded_links_count()} was downloaded."
            )
            manager.done_retries += 1
            print(f"Trying retry {manager.done_retries}")
        else:
            break
Exemple #50
0
                        continue
                if funcs:
                    await asyncio.wait(funcs)
            else:
                await asyncio.sleep(0.2)


def run_get_datas_background(queue):

    def handle_new_data(new_data):
        current_time = datetime.now()
        sensor_mac = new_data[0]
        sensor_data = new_data[1]

        if sensor_mac not in all_data or all_data[sensor_mac]['data'] != sensor_data:
            update_data = {'mac': sensor_mac, 'data': sensor_data, 'timestamp': current_time.isoformat()}
            all_data[sensor_mac] = update_data
            queue.put(update_data)

    RuuviTagSensor.get_datas(handle_new_data)


m = Manager()
q = m.Queue()

executor = ProcessPoolExecutor()
executor.submit(run_get_datas_background, q)

loop = asyncio.get_event_loop()
loop.run_until_complete(handle_queue(q))
Exemple #51
0
    pool = ProcessPoolExecutor(8)
    futures = []
    parmas = []

    for d in data:
        data_name = d[:-4]

        data_path = os.path.join(eval_path, data_name)
        fig_path = os.path.join(path, 'eval_result/result', data_name)

        for dir_name in sub_dirs:
            if not os.path.exists(os.path.join(fig_path, dir_name)):
                os.makedirs(os.path.join(fig_path, dir_name))

        parmas.append((data_path, data_name, fig_path))

    for p in parmas:
        for bits in [3, 4, 5, 6, 7]:
            futures.append(
                pool.submit(run,
                            p[0],
                            p[1],
                            True,
                            p[2],
                            bounded=False,
                            x_axis=[bits]))

    for f in as_completed(futures):
        print(f.result())

    pool.shutdown()
Exemple #52
0
    for utterance_id, wav_file in enumerate(wav_file_list):
        wav_file_path = os.path.join(file_path, wav_file)
        wav = audio.load_wav(wav_file_path)
        mel_spec = audio.melspectrogram(wav)

        save_file_name = str(speaker_id) + "_" + str(utterance_id) + ".npy"
        np.save(os.path.join(out_dataset, save_file_name), mel_spec)


if __name__ == "__main__":

    # # preprocess(0, "p225")
    # list_speaker = os.listdir(hp.origin_data)
    # # thrs = [threading.Thread(target=preprocess, args=[speaker_id, file_name])
    # #         for speaker_id, file_name in enumerate(list_speaker)]
    # # [thr.start() for thr in thrs]
    # # [thr.join() for thr in thrs]

    # executor = ProcessPoolExecutor(max_workers=cpu_count())
    # futures = [executor.submit(partial(preprocess, speaker_id, file_name))
    #            for speaker_id, file_name in enumerate(list_speaker)]
    # [future.result() for future in futures]

    list_speaker = os.listdir(hp.origin_data)
    executor = ProcessPoolExecutor(max_workers=cpu_count())
    futures = [
        executor.submit(partial(preprocess_test, speaker_id, file_name))
        for speaker_id, file_name in enumerate(list_speaker)
    ]
    [future.result() for future in futures]
Exemple #53
0
          end='')


def doing(thread2):
    print('休息一会{}'.format(t := random.random()))
    time.sleep(t)
    # for i in range(10):
    #     task = thread2.submit(work, i, random.randint(1, 5))
    #     task_list.append(task)
    #     task.running()
    # task.result()


if __name__ == '__main__':
    thread2 = ThreadPoolExecutor(max_workers=10)
    process1 = ProcessPoolExecutor(max_workers=10)
    pool = multiprocessing.Pool(30)
    task_list = []
    for j in range(100):
        pool.apply_async(doing, (thread2, ))
        task = process1.submit(doing, thread2)
        # task_list.append(task)
        # task.running()
        # task.result()
        # pool.close()
        # pool.join()
    print("\n线程都准备好了\t\n")
    process1.shutdown(wait=True)
    thread2.shutdown(wait=True)
    print("\t\n全部执行完毕")
Exemple #54
0
class OptimizeWorker:
    def __init__(self, config: Config):
        self.config = config
        self.model = None  # type: ChessModel
        self.loaded_filenames = set()
        self.loaded_data = deque(
        )  # this should just be a ring buffer i.e. queue of length 500,000 in AZ
        self.dataset = None
        self.optimizer = None
        self.executor = ProcessPoolExecutor(
            max_workers=config.trainer.cleaning_processes)

    def start(self):
        self.model = self.load_model()
        self.training()

    def training(self):
        self.compile_model()
        last_load_data_step = last_save_step = total_steps = self.config.trainer.start_total_steps
        self.load_play_data()

        while True:
            if self.dataset_size < self.config.trainer.min_data_size_to_learn:
                logger.info(
                    f"dataset_size={self.dataset_size} is less than {self.config.trainer.min_data_size_to_learn}"
                )
                sleep(60)
                self.load_play_data()
                continue
            #self.update_learning_rate(total_steps)
            steps = self.train_epoch(self.config.trainer.epoch_to_checkpoint)
            total_steps += steps
            #if last_save_step + self.config.trainer.save_model_steps < total_steps:
            self.save_current_model()
            last_save_step = total_steps

            # if last_load_data_step + self.config.trainer.load_data_steps < total_steps:
            #     self.load_play_data()
            #     last_load_data_step = total_steps

    def train_epoch(self, epochs):
        tc = self.config.trainer
        state_ary, policy_ary, value_ary = self.dataset
        self.model.model.fit(state_ary, [policy_ary, value_ary],
                             batch_size=tc.batch_size,
                             epochs=epochs,
                             shuffle=True)
        steps = (state_ary.shape[0] // tc.batch_size) * epochs
        return steps

    def compile_model(self):
        from keras.optimizers import SGD, Adam
        self.optimizer = Adam()  #SGD(lr=2e-1, momentum=0.9) # Adam better?
        losses = ['categorical_crossentropy',
                  'mean_squared_error']  # avoid overfit for supervised
        self.model.model.compile(optimizer=self.optimizer,
                                 loss=losses,
                                 loss_weights=self.config.trainer.loss_weights)

    def save_current_model(self):
        rc = self.config.resource
        model_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f")
        model_dir = os.path.join(
            rc.next_generation_model_dir,
            rc.next_generation_model_dirname_tmpl % model_id)
        os.makedirs(model_dir, exist_ok=True)
        config_path = os.path.join(model_dir,
                                   rc.next_generation_model_config_filename)
        weight_path = os.path.join(model_dir,
                                   rc.next_generation_model_weight_filename)
        self.model.save(config_path, weight_path)

    def load_play_data(self):
        filenames = get_game_data_filenames(self.config.resource)
        updated = False
        for filename in filenames:
            if filename in self.loaded_filenames:
                continue
            self.load_data_from_file(filename)
            updated = True

        # for filename in (self.loaded_filenames - set(filenames)):
        #     self.unload_data_of_file(filename)
        #     updated = True

        if updated:
            logger.debug("updating training dataset")
            self.dataset = self.collect_all_loaded_data()

    def collect_all_loaded_data(self):
        state_ary, policy_ary, value_ary = [], [], []
        while self.loaded_data:
            s, p, v = self.loaded_data.popleft().result()
            state_ary.append(s)
            policy_ary.append(p)
            value_ary.append(v)

        state_ary = np.concatenate(state_ary)
        policy_ary = np.concatenate(policy_ary)
        value_ary = np.concatenate(value_ary)
        return state_ary, policy_ary, value_ary

    def load_model(self):
        from chess_zero.agent.model_chess import ChessModel
        model = ChessModel(self.config)
        rc = self.config.resource

        dirs = get_next_generation_model_dirs(rc)
        if not dirs:
            logger.debug("loading best model")
            if not load_best_model_weight(model):
                raise RuntimeError("Best model can not loaded!")
        else:
            latest_dir = dirs[-1]
            logger.debug("loading latest model")
            config_path = os.path.join(
                latest_dir, rc.next_generation_model_config_filename)
            weight_path = os.path.join(
                latest_dir, rc.next_generation_model_weight_filename)
            model.load(config_path, weight_path)
        return model

    def load_data_from_file(self, filename):
        # try:
        logger.debug(f"loading data from {filename}")
        data = read_game_data_from_file(filename)
        self.loaded_data.append(
            self.executor.submit(convert_to_cheating_data,
                                 data))  ### HEEEERE, use with SL
        self.loaded_filenames.add(filename)
        # except Exception as e:
        #     logger.warning(str(e))

    @property
    def dataset_size(self):
        if self.dataset is None:
            return 0
        return len(self.dataset[0])
from concurrent.futures import ProcessPoolExecutor
from time import sleep
from validate_prime import is_prime, PRIMES

executor = ProcessPoolExecutor(4)
futures = [executor.submit(is_prime, p) for p in PRIMES[:6]]

while not all(map(lambda f: f.done(), futures)):
    print('do sth else, waiting')
    sleep(1)

print([f.result() for f in futures])
    w_lines_cl = []
    for line in files_cl:
        wav_path, text = line[0], line[1]
        mel = get_mel(wav_path)
        save_mel_path = str(Path(save_mel_dir).joinpath(Path(wav_path).name.replace(".wav", ".npy")))
        np.save(save_mel_path, mel)
        w_line = "{}|{}\n".format(Path(save_mel_path).name, text)
        w_lines_cl.append(w_line)
    return w_lines_cl


if __name__ == '__main__':
    """
    注意:tengxun数据集采样率为16000
    """
    meta_file = '/home/huangjiahong/tmp/tts/dataset/api/combine_dataset/tengxun_for_pytorch_tactron2/meta.txt'
    save_mel_dir = '/home/huangjiahong/tmp/tts/dataset/api/combine_dataset/tengxun_for_pytorch_tactron2/mels'
    save_meta_file = '/home/huangjiahong/tmp/tts/dataset/api/combine_dataset/tengxun_for_pytorch_tactron2/train.txt'
    filelines = files_to_list(meta_file)

    group_num = len(filelines) // 1000
    lines_groups = [filelines[i:i + group_num] for i in range(0, len(filelines), group_num)]
    executor = ProcessPoolExecutor(max_workers=4)
    all_task = [executor.submit(partial(process_groups, files_cl, save_mel_dir)) for files_cl in lines_groups]

    with open(save_meta_file, 'w', encoding='utf-8') as f:
        for task in tqdm(all_task):
            lines = task.result()
            for line in lines:
                f.write(line)
Exemple #57
0
def main():
    args = parse_arguments()
    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)
    os.environ['PYTHONHASHSEED'] = str(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    worker_init = WorkerInitObj(args.seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.enabled = True
    device, args = setup_training(args)
    model, optimizer, criterion = prepare_model_and_optimizer(args, device)
    pool = ProcessPoolExecutor(1)
    train_iter = subsetDataloader(path=args.train_path,
                                  batch_size=args.batch_size,
                                  worker_init=worker_init)
    test_iter = subsetDataloader(path=args.val_path,
                                 batch_size=args.batch_size,
                                 worker_init=worker_init)

    print('-' * 50 + 'args' + '-' * 50)
    for k in list(vars(args).keys()):
        print('{0}: {1}'.format(k, vars(args)[k]))
    print('-' * 30)
    print(model)
    print('-' * 50 + 'args' + '-' * 50)

    global_step = 0
    global_auc = 0

    s_time_train = time.time()
    for epoch in range(args.epoch):

        dataset_future = pool.submit(subsetDataloader, args.train_path,
                                     args.batch_size, worker_init)

        for step, batch in enumerate(train_iter):

            model.train()
            labels = batch['label'].to(device).float()
            batch = {
                t: {k: v.to(device)
                    for k, v in d.items()}
                for t, d in batch.items() if isinstance(d, dict)
            }

            optimizer.zero_grad()
            logits = model(batch)
            # print('logits', logits)
            # print('label', labels)
            loss = criterion(logits, labels)

            loss.backward()
            optimizer.step()

            # evaluate
            if global_step != 0 and global_step % args.eval_freq == 0:
                s_time_eval = time.time()
                model.eval()
                auc = evaluate(model, test_iter, device)
                e_time_eval = time.time()
                print('-' * 68)
                print('Epoch:[{0}] Step:[{1}] AUC:[{2}] time:[{3}s]'.format(
                    epoch, global_step, format(auc, '.4f'),
                    format(e_time_eval - s_time_eval, '.4f')))

                if auc > global_auc:
                    model_to_save = model.module if hasattr(
                        model, 'module') else model
                    output_save_file = os.path.join(
                        args.output_dir, "{}_auc_{}_step_{}_ckpt.pt".format(
                            args.model_name, format(auc, '.4f'), global_step))

                    if os.path.exists(output_save_file):
                        os.system('rm -rf {}'.format(output_save_file))
                    torch.save(
                        {
                            'model': model_to_save.state_dict(),
                            'name': args.model_name
                        }, output_save_file)
                    print('Epoch:[{0}] Step:[{1}] SavePath:[{2}]'.format(
                        epoch, global_step, output_save_file))
                    global_auc = auc
                print('-' * 68)

            # log
            if global_step != 0 and global_step % args.log_freq == 0:
                e_time_train = time.time()
                print('Epoch:[{0}] Step:[{1}] Loss:[{2}] Lr:[{3}] time:[{4}s]'.
                      format(epoch, global_step, format(loss.item(), '.4f'),
                             format(optimizer.param_groups[0]['lr'], '.6'),
                             format(e_time_train - s_time_train, '.4f')))
                s_time_train = time.time()

            global_step += 1

        del train_iter
        train_iter = dataset_future.result(timeout=None)
Exemple #58
0
class DataRouter(object):
    def __init__(self, config, component_builder):
        self._training_processes = max(config['max_training_processes'], 1)
        self.config = config
        self.responses = self._create_query_logger(config)
        self.model_dir = config['path']
        self.emulator = self._create_emulator()
        self.component_builder = component_builder if component_builder else ComponentBuilder(
            use_cache=True)
        self.project_store = self._create_project_store()
        self.pool = ProcessPool(self._training_processes)

    def __del__(self):
        """Terminates workers pool processes"""
        self.pool.shutdown()

    def _create_query_logger(self, config):
        """Creates a logger that will persist incoming queries and their results."""

        response_log_dir = config['response_log']
        # Ensures different log files for different processes in multi worker mode
        if response_log_dir:
            # We need to generate a unique file name, even in multiprocess environments
            timestamp = datetime.datetime.now().strftime('%Y%m%d-%H%M%S')
            log_file_name = "rasa_nlu_log-{}-{}.log".format(
                timestamp, os.getpid())
            response_logfile = os.path.join(response_log_dir, log_file_name)
            # Instantiate a standard python logger, which we are going to use to log requests
            utils.create_dir_for_file(response_logfile)
            query_logger = Logger(observer=jsonFileLogObserver(
                io.open(response_logfile, 'a', encoding='utf8')),
                                  namespace='query-logger')
            # Prevents queries getting logged with parent logger --> might log them to stdout
            logger.info("Logging requests to '{}'.".format(response_logfile))
            return query_logger
        else:
            # If the user didn't provide a logging directory, we wont log!
            logger.info(
                "Logging of requests is disabled. (No 'request_log' directory configured)"
            )
            return None

    def _collect_projects(self):
        if os.path.isdir(self.config['path']):
            projects = os.listdir(self.config['path'])
        else:
            projects = []

        projects.extend(self._list_projects_in_cloud())
        return projects

    def _create_project_store(self):
        projects = self._collect_projects()

        project_store = {}

        for project in projects:
            project_store[project] = Project(self.config,
                                             self.component_builder, project)

        if not project_store:
            project_store[RasaNLUConfig.DEFAULT_PROJECT_NAME] = Project(
                self.config)
        return project_store

    def _list_projects_in_cloud(self):
        try:
            from rasa_nlu.persistor import get_persistor
            p = get_persistor(self.config)
            if p is not None:
                return p.list_projects()
            else:
                return []
        except Exception:
            logger.exception("Failed to list projects.")
            return []

    def _create_emulator(self):
        """Sets which NLU webservice to emulate among those supported by Rasa"""

        mode = self.config['emulate']
        if mode is None:
            from rasa_nlu.emulators import NoEmulator
            return NoEmulator()
        elif mode.lower() == 'wit':
            from rasa_nlu.emulators.wit import WitEmulator
            return WitEmulator()
        elif mode.lower() == 'luis':
            from rasa_nlu.emulators.luis import LUISEmulator
            return LUISEmulator()
        elif mode.lower() == 'dialogflow':
            from rasa_nlu.emulators.dialogflow import DialogflowEmulator
            return DialogflowEmulator()
        else:
            raise ValueError("unknown mode : {0}".format(mode))

    def extract(self, data):
        return self.emulator.normalise_request_json(data)

    def parse(self, data):
        project = data.get("project") or RasaNLUConfig.DEFAULT_PROJECT_NAME
        model = data.get("model")

        if project not in self.project_store:
            projects = self._list_projects(self.config['path'])

            cloud_provided_projects = self._list_projects_in_cloud()
            projects.extend(cloud_provided_projects)

            if project not in projects:
                raise InvalidProjectError(
                    "No project found with name '{}'.".format(project))
            else:
                try:
                    self.project_store[project] = Project(
                        self.config, self.component_builder, project)
                except Exception as e:
                    raise InvalidProjectError(
                        "Unable to load project '{}'. Error: {}".format(
                            project, e))

        time = data.get('time')
        response, used_model = self.project_store[project].parse(
            data['text'], time, model)

        if self.responses:
            self.responses.info('',
                                user_input=response,
                                project=project,
                                model=used_model)

        return self.format_response(response)

    @staticmethod
    def _list_projects(path):
        """List the projects in the path, ignoring hidden directories."""
        return [os.path.basename(fn) for fn in utils.list_subdirectories(path)]

    @staticmethod
    def create_temporary_file(data, suffix=""):
        """Creates a tempfile.NamedTemporaryFile object for data"""

        if PY3:
            f = tempfile.NamedTemporaryFile("w+",
                                            suffix=suffix,
                                            delete=False,
                                            encoding="utf-8")
            f.write(data)
        else:
            f = tempfile.NamedTemporaryFile("w+", suffix=suffix, delete=False)
            f.write(data.encode("utf-8"))

        f.close()
        return f

    def parse_training_examples(self, examples, project, model):
        # type: (Optional[List[Message]], Text, Text) -> List[Dict[Text, Text]]
        """Parses a list of training examples to the project interpreter"""

        predictions = []
        for ex in examples:
            logger.debug("Going to parse: {}".format(ex.as_dict()))
            response, _ = self.project_store[project].parse(
                ex.text, None, model)
            logger.debug("Received response: {}".format(response))
            predictions.append(response)

        return predictions

    def format_response(self, data):
        return self.emulator.normalise_response_json(data)

    def get_status(self):
        # This will only count the trainings started from this process, if run in multi worker mode, there might
        # be other trainings run in different processes we don't know about.

        return {
            "available_projects": {
                name: project.as_dict()
                for name, project in self.project_store.items()
            }
        }

    def start_train_process(self, data, config_values):
        # type: (Text, Dict[Text, Any]) -> Deferred
        """Start a model training."""

        f = self.create_temporary_file(data, "_training_data")
        # TODO: fix config handling
        _config = self.config.as_dict()
        for key, val in config_values.items():
            _config[key] = val
        _config["data"] = f.name
        train_config = RasaNLUConfig(cmdline_args=_config)

        project = _config.get("project")
        if not project:
            raise InvalidProjectError("Missing project name to train")
        elif project in self.project_store:
            if self.project_store[project].status == 1:
                raise AlreadyTrainingError
            else:
                self.project_store[project].status = 1
        elif project not in self.project_store:
            self.project_store[project] = Project(self.config,
                                                  self.component_builder,
                                                  project)
            self.project_store[project].status = 1

        def training_callback(model_path):
            model_dir = os.path.basename(os.path.normpath(model_path))
            self.project_store[project].update(model_dir)
            return model_dir

        def training_errback(failure):
            target_project = self.project_store.get(
                failure.value.failed_target_project)
            if target_project:
                target_project.status = 0
            return failure

        logger.debug("New training queued")

        result = self.pool.submit(do_train_in_worker, train_config)
        result = deferred_from_future(result)
        result.addCallback(training_callback)
        result.addErrback(training_errback)

        return result

    def evaluate(self, data, project=None, model=None):
        # type: (Text, Optional[Text], Optional[Text]) -> Dict[Text, Any]
        """Perform a model evaluation."""

        project = project or RasaNLUConfig.DEFAULT_PROJECT_NAME
        model = model or None
        f = self.create_temporary_file(data, "_training_data")
        test_data = load_data(f.name)

        if project not in self.project_store:
            raise InvalidProjectError("Project {} could not "
                                      "be found".format(project))

        preds_json = self.parse_training_examples(test_data.intent_examples,
                                                  project, model)

        predictions = [{
            "text": e.text,
            "intent": e.data.get("intent"),
            "predicted": p.get("intent", {}).get("name"),
            "confidence": p.get("intent", {}).get("confidence")
        } for e, p in zip(test_data.intent_examples, preds_json)]

        y_true = [e.data.get("intent") for e in test_data.intent_examples]
        y_true = clean_intent_labels(y_true)

        y_pred = [p.get("intent", {}).get("name") for p in preds_json]
        y_pred = clean_intent_labels(y_pred)

        report, precision, f1, accuracy = get_evaluation_metrics(
            y_true, y_pred)

        return {
            "intent_evaluation": {
                "report": report,
                "predictions": predictions,
                "precision": precision,
                "f1_score": f1,
                "accuracy": accuracy
            }
        }
Exemple #59
0
class BokehTornado(TornadoApplication):
    ''' A Tornado Application used to implement the Bokeh Server.

        The Server class is the main public interface, this class has
        Tornado implementation details.

    Args:
        applications (dict of str : bokeh.application.Application) : map from paths to Application instances
            The application is used to create documents for each session.
        extra_patterns (seq[tuple]) : tuples of (str, http or websocket handler)
            Use this argmument to add additional endpoints to custom deployments
            of the Bokeh Server.

    '''

    def __init__(self, applications, io_loop=None, extra_patterns=None):
        if io_loop is None:
            io_loop = IOLoop.current()
        self._loop = io_loop

        self._resources = {}

        # Wrap applications in ApplicationContext
        self._applications = dict()
        for k,v in applications.items():
            self._applications[k] = ApplicationContext(v, self._loop)

        extra_patterns = extra_patterns or []
        relative_patterns = []
        for key in applications:
            for p in per_app_patterns:
                if key == "/":
                    route = p[0]
                else:
                    route = key + p[0]
                relative_patterns.append((route, p[1], { "application_context" : self._applications[key] }))
        websocket_path = None
        for r in relative_patterns:
            if r[0].endswith("/ws"):
                websocket_path = r[0]
        if not websocket_path:
            raise RuntimeError("Couldn't find websocket path")
        for r in relative_patterns:
            r[2]["bokeh_websocket_path"] = websocket_path

        all_patterns = extra_patterns + relative_patterns + toplevel_patterns
        log.debug("Patterns are: %r", all_patterns)
        super(BokehTornado, self).__init__(all_patterns, **settings)

        self._clients = set()
        self._executor = ProcessPoolExecutor(max_workers=4)
        self._loop.add_callback(self._start_async)
        self._stats_job = PeriodicCallback(self.log_stats, 15.0 * 1000, io_loop=self._loop)
        self._stats_job.start()
        self._unused_session_linger_seconds = 60*30
        self._cleanup_job = PeriodicCallback(self.cleanup_sessions, 17.0 * 1000, io_loop=self._loop)
        self._cleanup_job.start()

    @property
    def io_loop(self):
        return self._loop

    def root_url_for_request(self, request):
        # If we add a "whole server prefix," we'd put that on here too
        return request.protocol + "://" + request.host + "/"

    def websocket_url_for_request(self, request, websocket_path):
        protocol = "ws"
        if request.protocol == "https":
            protocol = "wss"
        return protocol + "://" + request.host + websocket_path

    def resources(self, request):
        root_url = self.root_url_for_request(request)
        if root_url not in self._resources:
            self._resources[root_url] = Resources(mode="server", root_url=root_url)
        return self._resources[root_url]

    def start(self):
        ''' Start the Bokeh Server application main loop.

        Args:

        Returns:
            None

        Notes:
            Keyboard interrupts or sigterm will cause the server to shut down.

        '''
        try:
            self._loop.start()
        except KeyboardInterrupt:
            print("\nInterrupted, shutting down")

    def stop(self):
        ''' Stop the Bokeh Server application.

        Returns:
            None

        '''
        self._loop.stop()

    @property
    def executor(self):
        return self._executor

    def new_connection(self, protocol, socket, application_context, session):
        connection = ServerConnection(protocol, socket, application_context, session)
        self._clients.add(connection)
        return connection

    def client_lost(self, connection):
        self._clients.discard(connection)
        connection.detach_session()

    def get_session(self, app_path, session_id):
        if app_path not in self._applications:
            raise ValueError("Application %s does not exist on this server" % app_path)
        return self._applications[app_path].get_session(session_id)

    def cleanup_sessions(self):
        for app in self._applications.values():
            app.cleanup_sessions(self._unused_session_linger_seconds)

    def log_stats(self):
        log.debug("[pid %d] %d clients connected", os.getpid(), len(self._clients))

    @gen.coroutine
    def run_in_background(self, _func, *args, **kwargs):
        """
        Run a synchronous function in the background without disrupting
        the main thread. Useful for long-running jobs.
        """
        res = yield self._executor.submit(_func, *args, **kwargs)
        raise gen.Return(res)

    @gen.coroutine
    def _start_async(self):
        try:
            atexit.register(self._atexit)
            signal.signal(signal.SIGTERM, self._sigterm)
        except Exception:
            self.exit(1)

    _atexit_ran = False
    def _atexit(self):
        if self._atexit_ran:
            return
        self._atexit_ran = True

        self._stats_job.stop()
        IOLoop.clear_current()
        loop = IOLoop()
        loop.make_current()
        loop.run_sync(self._cleanup)

    def _sigterm(self, signum, frame):
        print("Received SIGTERM, shutting down")
        self.stop()
        self._atexit()

    @gen.coroutine
    def _cleanup(self):
        log.debug("Shutdown: cleaning up")
        self._executor.shutdown(wait=False)
        self._clients.clear()
def make_figures(path, limits, ncores, fudge_factor, scale):
    # look for pump-probe data file
    path = Path(path)
    pool = ProcessPoolExecutor(max_workers=ncores)
    #rcParams.update(params)

    try:
        ddfile = h5py.File(str(path/'pump-probe.h5'), 'r')
        absfile = h5py.File(str(path/'absorption.h5'), 'r')
    except FileNotFoundError as e:
        print('Datafiles not found in dir {!s}'.format(path))
        return

    if scale < 0:
        scale = None

    # load ref
    ddref = np.array(ddfile['reference']).imag
    shape = (100, *ddref.shape)

    # calculate average pump-probe
    tmp = da.from_array(ddfile['00000/data'], chunks=shape)
    pts_used = tmp.shape[0] - 1 # assume we did Stark averaging
    rdataon = tmp[:pts_used].imag.mean(axis=0)
    rdataoff = tmp[pts_used].imag # last one is the field-off data
    dd = StarkData(*dask.compute(rdataon, rdataoff))
    w3, w1 = np.array(ddfile['w3']), np.array(ddfile['w1'])

    # load evecs
    energies = np.array(ddfile['meta/one band energies'])
    nstates = energies.shape[0]
    evecs2 = np.array(ddfile['meta/ge eigenvectors'])**2
    reorgs = np.array(ddfile['meta/reorganization energy matrix'])
    sbcouplingdiag = np.diag(ddfile['meta/sb coupling diagonal'])
    sbcouplingoffdiag = np.diag(ddfile['meta/sb coupling off-diagonal'])
    redfield = np.array(ddfile['meta/redfield relaxation matrix'])
    reorgs = np.diag(reorgs)[1:nstates+1]
    redfield = np.diag(redfield)[1:nstates+1]
    dephasingmat = np.array(ddfile['meta/lifetime dephasing matrix'])[:, ::2] + \
                   1j*np.array(ddfile['meta/lifetime dephasing matrix'])[:, 1::2]
    mu2_trace = np.linalg.norm(np.array(ddfile['00000/meta/ge dipoles'])[..., 2:], axis=-1)**2

    cfg = QcfpConfig.from_yaml(str(np.array(ddfile['cfg'])))
    if not cfg.include_complex_lifetimes:
        dephasingmat = dephasingmat.real

    imagdeph = dephasingmat[1:nstates+1,0].imag

    fixed_energies = energies - reorgs
    fixed_energies2 = energies - reorgs + imagdeph + fudge_factor
    # fudge_factor is the calibrated correction factor that comes from Stokes
    # shift, which moves the location of the monomer

    reorgs_trace = np.diagonal(ddfile['00000/meta/reorganization energy matrix'],
                               axis1=1, axis2=2)[:, 1:nstates+1]
    evecs2_trace = np.array(ddfile['00000/meta/ge eigenvectors'])**2

    energies_trace = np.array(ddfile['00000/meta/one band energies'])
    dephasingmat_trace = np.array(ddfile['00000/meta/lifetime dephasing '
                                         'matrix'])[:,:,::2] + \
                   1j*np.array(ddfile['00000/meta/lifetime dephasing '
                                      'matrix'])[:,:,1::2]

    if not cfg.include_complex_lifetimes:
        dephasingmat_trace = dephasingmat_trace.real

    imagdeph_trace = dephasingmat_trace[:, 1:nstates+1,0].imag
    corr_energies = energies_trace - reorgs_trace + imagdeph_trace + fudge_factor

    # prepare folder for writing things
    figpath = (path / 'figures')
    figpath.mkdir(exist_ok=True)

    with (figpath / 'eigen-energies.info').open('w') as f:
        print('Eigen-energies:', energies, file=f)
        print('GE reorganization energies:', reorgs, file=f)
        print('Reorg\'ed energies:', fixed_energies, file=f)
        print('Dephasing: ', imagdeph, file=f)
        print('Reorg\'ed energies + deph + fudge:', fixed_energies2, file=f)
        print(file=f)
        for i in range(evecs2.shape[0]):
            print('Localization of eigenstate {:d}:'.format(i), file=f)
            print(evecs2[i, :], file=f)
            print(file=f)
        print('S-B diagonal couplings:', sbcouplingdiag, file=f)
        print('S-B off-diagonal couplings:', sbcouplingoffdiag, file=f)
        print(file=f)

    # make diagnostic plots to make sure rotational averaging matches analytic
    s = str(figpath / '2d-reference.png')
    pool.submit(plot_2d, w1=w1, w3=w3, signal=ddref, path=s, axlim=limits,
            scale=scale)
    #plot_2d(w1=w1, w3=w3, signal=ddref, path=s, axlim=limits)

    #s = str(figpath / '2d-reference-old.png')
    #pool.submit(plot_result, w1=w1, w3=w3, signal=ddref, path=s,
    #            show=False)

    s = str(figpath / '2d-fieldon.png')
    pool.submit(plot_2d, w1=w1, w3=w3, signal=dd.fieldon, path=s, axlim=limits,
            scale=scale)

    s = str(figpath / '2d-fieldoff.png')
    pool.submit(plot_2d, w1=w1, w3=w3, signal=dd.fieldoff, path=s,
            axlim=limits,
            scale=scale)

    s = str(figpath / '2d-stark.png')
    pool.submit(plot_2d, w1=w1, w3=w3, signal=dd.fieldon-dd.fieldoff, path=s,
            axlim=limits,
            scale=scale)

    for i in range(nstates):
        s = str(figpath / '2d-evecs{:d}.png'.format(i))
        pool.submit(plot_evecs, corr_energies, evecs2_trace, i, s, axlim=limits)

    dd_projection = -(ddref).sum(axis=1)
    ddess_projection = -(dd.fieldon - dd.fieldoff).sum(axis=1)

    # do the same for absorption
    absref = np.array(absfile['reference'])
    shape = (100, *absref.shape)

    tmp = da.from_array(absfile['00000/data'], chunks=shape)
    pts_used = tmp.shape[0] - 1
    rdataon = tmp[:pts_used].mean(axis=0)
    rdataoff = tmp[pts_used]
    abs = StarkData(*dask.compute(rdataon, rdataoff))
    w3 = np.array(absfile['w3'])

    eigenenergies = {'with dephasing': fixed_energies2/1e3,
                     'without dephasing': fixed_energies/1e3}

    # add the localization plot
    s = str(figpath / 'linear-localization.png')
    fig, (ax1, ax2) = subplots(2, 1, sharex=True)
    for i in range(0, nstates):
        weights_trace = mu2_trace*evecs2_trace[:, i, :]
        heights, bins = np.histogram(energies_trace.reshape(-1)/1e3,
                                     bins=80,
                                     weights=weights_trace.reshape(-1),
                                     density=False)
        widths = np.diff(bins)
        ax1.bar(bins[:-1], heights/heights.max(), widths, alpha=0.8,
                label='site {:d}'.format(i+1))

    ax1.legend()

    ax2.plot(w3/1e3, abs.fieldoff, label='field off')
    ax2.plot(w3/1e3, abs.fieldon, label='field on')
    ax2.plot(w3/1e3, abs.fieldon - abs.fieldoff, label='stark')
    ax2.set_xlabel(r'$\omega_t$ ($\times 10^3\ \mathrm{cm}^{-1}$)')
    ax2.set_xlim(*limits)

    ax2.legend()
    fig.savefig(str(s))

    s = str(figpath / 'linear-reference.png')
    pool.submit(plot_linear, w3=w3, signal=absref, path=s,
                axlim=limits, eigenenergies=eigenenergies,
                scale=scale)

    s = str(figpath / 'linear-fieldoff.png')
    pool.submit(plot_linear, w3=w3, signal=abs.fieldoff, path=s,
                axlim=limits, eigenenergies=eigenenergies,
                scale=scale)

    s = str(figpath / 'linear-fieldon.png')
    pool.submit(plot_linear, w3=w3, signal=abs.fieldon, path=s,
                axlim=limits, eigenenergies=eigenenergies,
                scale=scale)

    s = str(figpath / 'linear-stark.png')
    pool.submit(plot_linear, w3=w3, signal=abs.fieldon - abs.fieldoff, path=s,
                axlim=limits, eigenenergies=eigenenergies, scale=scale)

    s = str(figpath / 'linear-projections.png')
    ax, scale2 = plot_linear(w3=w3, signal=abs.fieldoff, path=s, axlim=limits)
    plot_linear(w3=w3, signal=dd_projection, path=s, ax=ax, axlim=limits,
            eigenenergies=eigenenergies, scale=scale)

    s = str(figpath / 'linear-stark-projections.png')
    ax, scale2 = plot_linear(w3=w3, signal=abs.fieldon - abs.fieldoff, path=s,
            axlim=limits, scale=scale)
    plot_linear(w3=w3, signal=ddess_projection, path=s, ax=ax,
                axlim=limits, eigenenergies=eigenenergies, scale=scale)
    print('submitted some figures')

    pool.shutdown(wait=True)