Python ProcessPoolExecutor 예제들, concurrent.futures.ProcessPoolExecutor Python 예제들

예제 #1

0

파일 보기

파일: testutils.py 프로젝트: cloudpipe/cloudpickle

class _Worker(object):
    def __init__(self, protocol=None):
        self.protocol = protocol
        self.pool = ProcessPoolExecutor(max_workers=1)
        self.pool.submit(id, 42).result()  # start the worker process

    def run(self, func, *args, **kwargs):
        """Synchronous remote function call"""

        input_payload = dumps((func, args, kwargs), protocol=self.protocol)
        result_payload = self.pool.submit(
            call_func, input_payload, self.protocol).result()
        result = loads(result_payload)

        if isinstance(result, BaseException):
            raise result
        return result

    def memsize(self):
        workers_pids = [p.pid if hasattr(p, "pid") else p
                        for p in list(self.pool._processes)]
        num_workers = len(workers_pids)
        if num_workers == 0:
            return 0
        elif num_workers > 1:
            raise RuntimeError("Unexpected number of workers: %d"
                               % num_workers)
        return psutil.Process(workers_pids[0]).memory_info().rss

    def close(self):
        self.pool.shutdown(wait=True)

예제 #2

0

파일 보기

파일: path_random.py 프로젝트: dgasmith/opt_einsum

    def parallel(self, parallel):
        # shutdown any previous executor if we are managing it
        if getattr(self, '_managing_executor', False):
            self._executor.shutdown()

        self._parallel = parallel
        self._managing_executor = False

        if parallel is False:
            self._executor = None
            return

        if parallel is True:
            from concurrent.futures import ProcessPoolExecutor
            self._executor = ProcessPoolExecutor()
            self._managing_executor = True
            return

        if isinstance(parallel, numbers.Number):
            from concurrent.futures import ProcessPoolExecutor
            self._executor = ProcessPoolExecutor(parallel)
            self._managing_executor = True
            return

        # assume a pool-executor has been supplied
        self._executor = parallel

예제 #3

0

파일 보기

파일: preprocessor.py 프로젝트: duvtedudug/Tacotron-2

def build_from_path(hparams, input_dirs, mel_dir, linear_dir, wav_dir, n_jobs=12, tqdm=lambda x: x):
	"""
	Preprocesses the speech dataset from a gven input path to given output directories

	Args:
		- hparams: hyper parameters
		- input_dir: input directory that contains the files to prerocess
		- mel_dir: output directory of the preprocessed speech mel-spectrogram dataset
		- linear_dir: output directory of the preprocessed speech linear-spectrogram dataset
		- wav_dir: output directory of the preprocessed speech audio dataset
		- n_jobs: Optional, number of worker process to parallelize across
		- tqdm: Optional, provides a nice progress bar

	Returns:
		- A list of tuple describing the train examples. this should be written to train.txt
	"""

	# We use ProcessPoolExecutor to parallelize across processes, this is just for 
	# optimization purposes and it can be omited
	executor = ProcessPoolExecutor(max_workers=n_jobs)
	futures = []
	index = 1
	for input_dir in input_dirs:
		with open(os.path.join(input_dir, 'metadata.csv'), encoding='utf-8') as f:
			for line in f:
				parts = line.strip().split('|')
				wav_path = os.path.join(input_dir, 'wavs', '{}.wav'.format(parts[0]))
				text = parts[2]
				futures.append(executor.submit(partial(_process_utterance, mel_dir, linear_dir, wav_dir, index, wav_path, text, hparams)))
				index += 1

	return [future.result() for future in tqdm(futures) if future.result() is not None]

예제 #4

0

파일 보기

파일: ljspeech.py 프로젝트: keithito/tacotron

def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  '''Preprocesses the LJ Speech dataset from a given input path into a given output directory.

    Args:
      in_dir: The directory where you have downloaded the LJ Speech dataset
      out_dir: The directory to write the output into
      num_workers: Optional number of worker processes to parallelize across
      tqdm: You can optionally pass tqdm to get a nice progress bar

    Returns:
      A list of tuples describing the training examples. This should be written to train.txt
  '''

  # We use ProcessPoolExecutor to parallelize across processes. This is just an optimization and you
  # can omit it and just call _process_utterance on each input if you want.
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
    for line in f:
      parts = line.strip().split('|')
      wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
      text = parts[2]
      futures.append(executor.submit(partial(_process_utterance, out_dir, index, wav_path, text)))
      index += 1
  return [future.result() for future in tqdm(futures)]

예제 #5

0

파일 보기

파일: execution.py 프로젝트: gblanchard4/qiime2

    def __call__(self, workflow, input_artifact_filepaths,
                 parameter_references, output_artifact_filepaths):
        input_artifact_abs_filepaths = \
            {k: os.path.abspath(v)
             for k, v in input_artifact_filepaths.items()}
        output_artifact_abs_filepaths = \
            {k: os.path.abspath(v)
             for k, v in output_artifact_filepaths.items()}
        job = workflow.to_script(input_artifact_abs_filepaths,
                                 parameter_references,
                                 output_artifact_abs_filepaths)
        temp_dir = tempfile.mkdtemp()
        pool = ProcessPoolExecutor(max_workers=1)
        py_filename = os.path.join(temp_dir, 'job.py')
        with open(py_filename, 'w') as py_file:
            py_file.write(job.code)
        # TODO: handle subproccess exceptions
        future = pool.submit(subprocess.run,
                             [self._python_executable, py_filename],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
        # TODO: handle callback exceptions
        # TODO: make sure that tempdir is cleaned up even if there is an
        # exception in pool.submit or the callback
        future.add_done_callback(lambda _: shutil.rmtree(temp_dir))

        return future

예제 #6

0

파일 보기

파일: walkfilter.py 프로젝트: CygnusNetworks/nssct

def main():
	parser = argparse.ArgumentParser()
	group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument("--filter", action="store_true", help="act as a filter")
	group.add_argument("--transform", metavar="MAPPING", type=argparse.FileType("r"), help="transform all files given in the mapping file")
	parser.add_argument("--srcprefix", metavar="PREFIX", default="", help="when transforming data files prepend this PREFIX to source paths")
	parser.add_argument("--dstprefix", metavar="PREFIX", default="", help="when transforming data files prepend this PREFIX to destination paths")
	args = parser.parse_args()
	if args.filter:
		check_stream(sys.stdin, sys.stdout)
	else:
		exe = Executor()
		res = []
		for lineno, line in enumerate(args.transform):
			line = line.split('#', 1)[0]  # comment
			line = line.rstrip()  # trailing space or newline
			match = re.match(r'^(\S+):\s*(\S+)$', line)
			if not match:
				raise ValueError("syntax error on line %d" % (lineno + 1))
			destination, source = match.groups()
			source = os.path.join(args.srcprefix, source)
			destination = os.path.join(args.dstprefix, destination)
			res.append(exe.submit(transform, source, destination))
		while res:
			res.pop(0).result()  # propagate exceptions

예제 #7

0

파일 보기

파일: base.py 프로젝트: CiscoSystems/jujucharm-n1k

    def __init__(self, apiurl, apiversion, charmworldurl=None, io_loop=None):
        """Initialize the deployer.

        The apiurl argument is the URL of the juju-core WebSocket server.
        The apiversion argument is the Juju API version (e.g. "go").
        """
        self._apiurl = apiurl
        self._apiversion = apiversion
        if charmworldurl is not None and not charmworldurl.endswith('/'):
            charmworldurl = charmworldurl + '/'
        self._charmworldurl = charmworldurl
        if io_loop is None:
            io_loop = IOLoop.current()
        self._io_loop = io_loop

        # Deployment validation and importing executors.
        self._validate_executor = ProcessPoolExecutor(1)
        self._run_executor = ProcessPoolExecutor(1)

        # An observer instance is used to watch the deployments progress.
        self._observer = utils.Observer()
        # Queue stores the deployment identifiers corresponding to the
        # currently started/queued jobs.
        self._queue = []
        # The futures attribute maps deployment identifiers to Futures.
        self._futures = {}

예제 #8

0

파일 보기

파일: pool.py 프로젝트: honmaple/maple-spider

class ThreadPool(object):
    '''线程池实现'''

    def __init__(self, thread_num=1, process_num=1, q_size=2000, daemon=True):
        self.thread_pool = _ThreadPoolExecutor(thread_num, daemon)
        self.process_pool = ProcessPoolExecutor(process_num)
        self.result_queue = Queue(q_size)

    def wait(self, threads=[]):
        thread_wait(threads)

    def add_thread(self, target, args=()):
        result = self.thread_pool.submit(target, *args)
        return result

    def add_process(self, target, args=()):
        result = self.process_pool.submit(target, *args)
        return result

    def thread_map(self, target, args=[]):
        return [self.thread_pool.submit(target, arg) for arg in args]

    def process_map(self, target, args=[]):
        return self.process_pool.map(target, args)

    def map(self, target, args=[]):
        return self.process_map(target, args)

예제 #9

0

파일 보기

파일: vulnerable.py 프로젝트: matthewmpalen/violent-python

def main():
    """
    Makes banner requests with a ThreadPoolExecutor.
    """
    arg_parser = ArgumentParser()
    arg_parser.add_argument("--ip", help="IP address", required=True)
    arg_parser.add_argument("--pool", help="Executor pool type", choices=("thread", "process"), required=True)
    arg_parser.add_argument(
        "--workers", help="Number of executor workers", type=int, choices=range(1, 9), required=True
    )
    args = arg_parser.parse_args()

    ip = args.ip
    pool = args.pool
    workers = args.workers

    if pool == "process":
        executor = ProcessPoolExecutor(max_workers=workers)
    elif pool == "thread":
        executor = ThreadPoolExecutor(max_workers=workers)

    for i in range(1, 256):
        for port in get_ports():
            executor.submit(banner_request, "{0}.{1}".format(ip, i), port)

    print("[!] Finished spawning banner requests")

예제 #10

0

파일 보기

파일: connection.py 프로젝트: wallarelvo/jammi

 def on_message(self, message):
     print len(message)
     result = yield tornado.gen.Task(self.process_message, message)
     return
     pool = ProcessPoolExecutor()
     fut = pool.submit(call_process, message)
     ret = yield fut
     pool.shutdown()

예제 #11

0

파일 보기

파일: august_23.py 프로젝트: dputtick/rc_code_dojo

def main(chunk):  
    nums = range(1, 1000)
    pool = ProcessPoolExecutor()
    count = 0
    returned_iterator = pool.map(is_prime, nums, timeout=None, chunksize=chunk)
    for result in returned_iterator:
        if result:
            count += 1
    return count

예제 #12

0

파일 보기

파일: website_usecases.py 프로젝트: kkonrad/Troia-System-Tests

def run_simulation(datasets, workers_num):
    workers = [TroiaWebDemoUser(get_troia_client(),
        "TES_TROJ_JID_" + str(i)) for i in xrange(workers_num)]
    for worker in workers:
        worker.set_datasets(datasets)
    executor = ProcessPoolExecutor(workers_num)
    # maap = map
    maap = lambda *args, **kwargs: list(executor.map(*args, **kwargs))
    maap(exec_fun, workers, repeat(ITERATIONS, workers_num))

예제 #13

0

파일 보기

파일: render.py 프로젝트: goofwear/omnitool

def splice_gmaps(threadpool, tilefolder, tempfiles, name):
    processpool = ProcessPoolExecutor()
    caption = "Rendering Zoom Layers {}".format(name)
    loadingbar = Bar(caption=caption)
    loadingbar.set_progress(0, caption)
    pygame.display.update()

    side = 1600
    zoom_levels = 4
    factor = 2 ** (zoom_levels - 1)
    masterside = side * factor
    plates = generate_plate_coords(factor, tempfiles)

    master_surface = pygame.Surface((masterside, masterside))

    done = 0
    total = len(tempfiles) + len(plates) * sum((4 ** x for x in range(zoom_levels)))
    fraction = 100 / total

    def render_base_to_master(task):
        imgdata, size, location = task.result()
        tempsurf = pygame.image.frombuffer(imgdata, size, "RGB")
        master_surface.blit(tempsurf, location)

    tasks = []
    for masterpos, pieces in plates.items():
        master_surface.fill((132, 170, 248))

        for x, y in pieces:
            task = processpool.submit(unpack, tempfiles, x, y, ((x % factor) * side, (y % factor) * side))
            tasks.append(threadpool.submit(render_base_to_master, task))
            tasks.append(task)
        current_area = masterside

        for task in tasks:
            task.result()
            done += 0.5
            loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total))
        for z in range(zoom_levels):
            tasks = []
            pieces = masterside // current_area
            x_off = masterpos[0] * pieces
            y_off = masterpos[1] * pieces
            for xp in range(pieces):
                for yp in range(pieces):
                    temp = pygame.Surface.subsurface(master_surface,
                                                     (xp * current_area, yp * current_area, current_area, current_area))
                    filename = "screen_{}_{}_{}.png".format(z + 1, x_off + xp, y_off + yp)
                    data = pygame.image.tostring(temp, "RGB")
                    tasks.append(processpool.submit(render_plate, data, tilefolder, temp.get_size(), side, filename))

            for task in tasks:
                task.result()
                done += 1
                loadingbar.set_progress(done * fraction, caption + " %4d of %4d" % (done, total))
            current_area //= 2
    processpool.shutdown()

예제 #14

0

파일 보기

파일: broker.py 프로젝트: mattclarkdotnet/cf-service-broker

 def _run(self, instance_id: str, service_id: str, plan_id: str, accepts_incomplete: bool, func: Callable, *func_args) -> Any:
     # The _match_synchronicity call must come first because it may raise an exception
     sync = self._match_synchronicity(service_id, plan_id, accepts_incomplete)
     executor = ProcessPoolExecutor(max_workers=1)
     future = executor.submit(func, *func_args)
     if sync:
         return future.result(timeout=59)
     else:
         self.async_ops[instance_id] = future
         raise ProvisioningAsynchronously

예제 #15

0

파일 보기

파일: harkvisualizer.py 프로젝트: alepcat1710/HarkVisualizer

 def post(self):
     file = self.request.files['file'][0]
     hark.client.login()
     hark.client.createSession(default_hark_config)
     log.info("Uploading asynchrounously")
     pool = ProcessPoolExecutor(max_workers=2)
     future = pool.submit(async_upload, file)
     yield future
     pool.shutdown()
     log.info("Rendering visualization page")
     self.render('visualize.html')

예제 #16

0

파일 보기

파일: pi_futures.py 프로젝트: gjbex/training-material

def compute_pi(nr_tries=10000, pool_size=None, constructor=None):
    if not constructor:
        executor = ProcessPoolExecutor(max_workers=pool_size)
    else:
        executor = constructor(max_workers=pool_size)
    args = [(nr_tries//pool_size, )
            for _ in range(pool_size)]
    results = executor.map(partial_pi, args)
    if not pool_size:
        pool_size = multiprocessing.cpu_count()
    return sum(results)/pool_size

예제 #17

0

파일 보기

파일: item_41.py 프로젝트: lancelote/effective_python

def main():
    numbers = [
        (1963309, 2265973),
        (2030677, 3814172),
        (1551645,	2229620),
        (2039045,	2020802)
    ]
    start = time()
    pool = ProcessPoolExecutor(max_workers=2)
    results = list(pool.map(gcd, numbers))
    end = time()
    print('Took %.3f seconds' % (end - start))

예제 #18

0

파일 보기

파일: core.py 프로젝트: minrk/fakestockdata

def generate_stocks(freq=pd.Timedelta(seconds=60), directory=None):
    from concurrent.futures import ProcessPoolExecutor, wait
    e = ProcessPoolExecutor()
    if os.path.exists(os.path.join('data', 'daily')):
        glob_path = os.path.join('data', 'daily', '*')
    else:
        glob_path = os.path.join(daily_dir, '*')
    filenames = sorted(glob(glob_path))

    futures = [e.submit(generate_stock, fn, directory=directory, freq=freq)
                for fn in filenames]
    wait(futures)

예제 #19

0

파일 보기

파일: preprocessing.py 프로젝트: afd77/FloWaveNet

def build_from_path(in_dir, out_dir, num_workers=1):
    executor = ProcessPoolExecutor(max_workers=num_workers)
    futures = []
    index = 1
    with open(os.path.join(in_dir, 'metadata.csv'), encoding='utf-8') as f:
        for line in f:
            parts = line.strip().split('|')
            wav_path = os.path.join(in_dir, 'wavs', '%s.wav' % parts[0])
            text = parts[2]
            futures.append(executor.submit(
                partial(_process_utterance, out_dir, index, wav_path, text)))
            index += 1
    return [future.result() for future in futures]

예제 #20

0

파일 보기

파일: concurrenttools.py 프로젝트: danielnaab/pomp

class ConcurrentDownloader(BaseDownloader, ConcurrentMixin):
    """Concurrent ProcessPoolExecutor downloader

    :param pool_size: size of ThreadPoolExecutor
    :param timeout: request timeout in seconds
    """
    def __init__(
            self, worker_class,
            worker_kwargs=None, pool_size=5, middlewares=None,):

        # configure executor
        self.pool_size = pool_size
        self.executor = ProcessPoolExecutor(max_workers=self.pool_size)

        # prepare worker params
        self.worker_params = {
            'worker_class': worker_class,
            'worker_kwargs': worker_kwargs or {},
        }

        # ctrl-c support for python2.x
        # trap sigint
        signal.signal(signal.SIGINT, lambda s, f: s)

        super(ConcurrentDownloader, self).__init__(
            middlewares=middlewares
        )

    def get(self, requests):

        for request in requests:
            # delegate request processing to the executor
            future = self.executor.submit(
                _run_download_worker, self.worker_params, request,
            )

            # build Planned object
            done_future = Planned()

            # when executor finish request - fire done_future
            future.add_done_callback(
                partial(self._done, request, done_future)
            )

            yield done_future

    def get_workers_count(self):
        return self.pool_size

    def stop(self):
        self.executor.shutdown()

예제 #21

0

파일 보기

파일: sht.py 프로젝트: wavemoth/wavemoth

    def precompute_to_stream(self, stream, logger):
        """
        File format:
          int64: nnz in total
          padding to 128 bytes
          double[ni]: x_squared
          double[(lmax + 1) * ni]: Lambda_0
          double[(lmax + 1) * ni]: Lambda_1
          ushort[(lmax + 1)**2]: i_stops
          Format of i_stops is m-major ordering, but with, additionally, even coefficents
          all coming before the odd ones.
        """
        from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

        executor = ProcessPoolExecutor(max_workers=8)

        start_pos = stream.tell()
        for i in range(2 * (self.lmax + 1)):
            write_int64(stream, 0)
        write_array(stream, self.x_squared)

        futures = []
        for m in range(self.lmax + 1):
            for odd in [0, 1]:
                futures.append(executor.submit(precompute_single, self.thetas, self.lmax,
                                               self.epsilon_legendre, m, odd))
        
        nnz_total = 0
        Lambda_1_list = []
        i_stops_list = []
        nnz_list = []
        it = iter(futures)
        for m in range(self.lmax + 1):
            for odd in [0, 1]:
                Lambda_0, Lambda_1, i_stops, nnz = it.next().result()
                logger.info('Got %s m=%d' % (['even', 'odd'][odd], m))
                write_array(stream, Lambda_0)
                Lambda_1_list.append(Lambda_1)
                i_stops_list.append(i_stops)
                nnz_list.append(nnz)
                nnz_total += nnz
        for arr in Lambda_1_list:
            write_array(stream, arr)
        for arr in i_stops_list:
            write_array(stream, arr)
        end_pos = stream.tell()
        stream.seek(start_pos)
        for nnz in nnz_list:
            write_int64(stream, nnz)
        stream.seek(end_pos)
        return nnz_total

예제 #22

0

파일 보기

파일: data_router.py 프로젝트: marami52/rasa_nlu

    def __init__(self,
                 project_dir=None,
                 max_training_processes=1,
                 response_log=None,
                 emulation_mode=None,
                 remote_storage=None,
                 component_builder=None,
                 model_server=None,
                 wait_time_between_pulls=None):
        self._training_processes = max(max_training_processes, 1)
        self._current_training_processes = 0
        self.responses = self._create_query_logger(response_log)
        self.project_dir = config.make_path_absolute(project_dir)
        self.emulator = self._create_emulator(emulation_mode)
        self.remote_storage = remote_storage
        self.model_server = model_server
        self.wait_time_between_pulls = wait_time_between_pulls

        if component_builder:
            self.component_builder = component_builder
        else:
            self.component_builder = ComponentBuilder(use_cache=True)

        self.project_store = self._create_project_store(project_dir)

        # tensorflow sessions are not fork-safe,
        # and training processes have to be spawned instead of forked. See
        # https://github.com/tensorflow/tensorflow/issues/5448#issuecomment
        # -258934405
        multiprocessing.set_start_method('spawn', force=True)

        self.pool = ProcessPool(self._training_processes)

예제 #23

0

파일 보기

파일: sparktk_ext.py 프로젝트: trustedanalytics/jupyter

def spark_submit(exec_string, log_file, driver_path):
    """
    asynchronously run the pyspark/sparktk submitted script while writing the logs to the log_file for the app
    :param exec_string: the command that is going to be run
    :param log_file: the file containing command(script) logs while running
    :param driver_path: the path to the main sparktk/pyspark script within the uploads folder
    :return: None
    """
    print "Entering spark_submit"
    mark_submitted(driver_path)
    pool = Pool(max_workers=1)
    cmd_string = "%s >>%s 2>&1" % (exec_string, log_file)
    print "CMD stting is %s" % (cmd_string)
    future = pool.submit(subprocess.call, cmd_string, shell=True)
    future.driver_path = driver_path
    future.add_done_callback(mark_completed)

예제 #24

0

파일 보기

파일: score_processor.py 프로젝트: wiki-ai/revscoring

    def __init__(self, scoring_model, extractor, cpu_workers=None,
                 io_workers=None, batch_size=50):
        self.scoring_model = scoring_model
        self.extractor = extractor
        self.cpu_workers = \
            int(cpu_workers) if cpu_workers is not None else cpu_count()
        self.batch_size = int(batch_size)

        if io_workers is not None:
            self.io_workers = int(io_workers)
        else:
            self.io_workers = max(self.MIN_IO_WORKERS,
                                  min(self.MAX_IO_WORKERS,
                                      int(self.cpu_workers *
                                          self.IO_WORKER_MULTIPLIER)))

        logger.info("Starting up IO thread pool with {0} workers"
                    .format(self.io_workers))
        self.scores_ex = ThreadPoolExecutor(max_workers=self.io_workers)
        logger.info("Starting up CPU thread pool with {0} workers"
                    .format(self.cpu_workers))
        self.process_ex = ProcessPoolExecutor(max_workers=self.cpu_workers)

        roots = dependencies.dig(self.scoring_model.features)
        self.root_datasources = [d for d in roots if isinstance(d, Datasource)]

예제 #25

0

파일 보기

파일: tornado.py 프로젝트: ericmjl/bokeh

    def initialize(self,
                 io_loop=None,
                 keep_alive_milliseconds=37000,
                 # how often to check for unused sessions
                 check_unused_sessions_milliseconds=17000,
                 # how long unused sessions last
                 unused_session_lifetime_milliseconds=15000,
                 # how often to log stats
                 stats_log_frequency_milliseconds=15000,
                 **kw):

        if io_loop is None:
            io_loop = IOLoop.current()
        self._loop = io_loop

        for app_context in self._applications.values():
            app_context._loop = self._loop

        self._clients = set()
        self._executor = ProcessPoolExecutor(max_workers=4)
        self._loop.add_callback(self._start_async)
        self._stats_job = PeriodicCallback(self.log_stats,
                                           stats_log_frequency_milliseconds,
                                           io_loop=self._loop)
        self._unused_session_linger_milliseconds = unused_session_lifetime_milliseconds
        self._cleanup_job = PeriodicCallback(self.cleanup_sessions,
                                             check_unused_sessions_milliseconds,
                                             io_loop=self._loop)

        if keep_alive_milliseconds > 0:
            self._ping_job = PeriodicCallback(self.keep_alive, keep_alive_milliseconds, io_loop=self._loop)
        else:
            self._ping_job = None

예제 #26

0

파일 보기

파일: blizzard.py 프로젝트: keithito/tacotron

def build_from_path(in_dir, out_dir, num_workers=1, tqdm=lambda x: x):
  executor = ProcessPoolExecutor(max_workers=num_workers)
  futures = []
  index = 1
  for book in books:
    with open(os.path.join(in_dir, book, 'sentence_index.txt')) as f:
      for line in f:
        parts = line.strip().split('\t')
        if line[0] is not '#' and len(parts) == 8 and float(parts[3]) > _min_confidence:
          wav_path = os.path.join(in_dir, book, 'wav', '%s.wav' % parts[0])
          labels_path = os.path.join(in_dir, book, 'lab', '%s.lab' % parts[0])
          text = parts[5]
          task = partial(_process_utterance, out_dir, index, wav_path, labels_path, text)
          futures.append(executor.submit(task))
          index += 1
  results = [future.result() for future in tqdm(futures)]
  return [r for r in results if r is not None]

예제 #27

0

파일 보기

파일: pfam_sets.py 프로젝트: mmayers12/n15_mice

def make_arch_db():
    executor = ProcessPoolExecutor(max_workers=8)
    by = 10000
    m = 60000000
    #by = 2000
    #m = 10000
    e = executor.map(process_range, zip(range(0, m, by),range(by, m+by, by)))
    executor.shutdown()
    print('done calculating architectures')
    pfam_sets = merge(e)
    print(len(pfam_sets))
    gsave(pfam_sets,'pfam_sets.pkl.gz')
    
    # mongodb
    db = MongoClient('wl-cmadmin', 27017).ArchDB_Pfam_071414.ArchDB_Pfam_071414
    db.insert(map(lambda item: {'_id': min(item[1]), 'pID': list(item[1]), 'Pfam': item[0]}, pfam_sets.items()))
    db.ensure_index('pID')
    db.ensure_index('Pfam')

예제 #28

0

파일 보기

파일: tornado.py 프로젝트: ttlbyte/bokeh

    def __init__(self, applications,
                 io_loop=None,
                 extra_patterns=None,
                 # heroku, nginx default to 60s timeout, so well less than that
                 keep_alive_milliseconds=37000):
        if io_loop is None:
            io_loop = IOLoop.current()
        self._loop = io_loop

        if keep_alive_milliseconds < 0:
            # 0 means "disable"
            raise ValueError("keep_alive_milliseconds must be >= 0")

        self._resources = {}

        # Wrap applications in ApplicationContext
        self._applications = dict()
        for k,v in applications.items():
            self._applications[k] = ApplicationContext(v, self._loop)

        extra_patterns = extra_patterns or []
        relative_patterns = []
        for key in applications:
            app_patterns = []
            for p in per_app_patterns:
                if key == "/":
                    route = p[0]
                else:
                    route = key + p[0]
                app_patterns.append((route, p[1], { "application_context" : self._applications[key] }))

            websocket_path = None
            for r in app_patterns:
                if r[0].endswith("/ws"):
                    websocket_path = r[0]
            if not websocket_path:
                raise RuntimeError("Couldn't find websocket path")
            for r in app_patterns:
                r[2]["bokeh_websocket_path"] = websocket_path

            relative_patterns.extend(app_patterns)

        all_patterns = extra_patterns + relative_patterns + toplevel_patterns
        log.debug("Patterns are: %r", all_patterns)
        super(BokehTornado, self).__init__(all_patterns, **settings)

        self._clients = set()
        self._executor = ProcessPoolExecutor(max_workers=4)
        self._loop.add_callback(self._start_async)
        self._stats_job = PeriodicCallback(self.log_stats, 15.0 * 1000, io_loop=self._loop)
        self._unused_session_linger_seconds = 60*30
        self._cleanup_job = PeriodicCallback(self.cleanup_sessions, 17.0 * 1000, io_loop=self._loop)

        if keep_alive_milliseconds > 0:
            self._ping_job = PeriodicCallback(self.keep_alive, keep_alive_milliseconds, io_loop=self._loop)
        else:
            self._ping_job = None

예제 #29

0

파일 보기

파일: data_router.py 프로젝트: codealphago/rasa_nlu

 def __init__(self, config, component_builder):
     self._training_processes = config['max_training_processes'] if config['max_training_processes'] > 0 else 1
     self.config = config
     self.responses = self._create_query_logger(config)
     self.model_dir = config['path']
     self.emulator = self._create_emulator()
     self.component_builder = component_builder if component_builder else ComponentBuilder(use_cache=True)
     self.project_store = self._create_project_store()
     self.pool = ProcessPool(self._training_processes)

예제 #30

0

파일 보기

파일: genetic_neural_network.py 프로젝트: jpodeszwik/openai

 def __init__(self, pop_size, problem):
     self.problem = problem
     self.pop = [Network.random_network() for i in range(pop_size)]
     self.fitness_cache = {}
     self.best = None
     self.nt = NetTester(problem)
     self.pp = ProcessPoolExecutor(max_workers=4)
     self.ntf = NetworkTesterFactory(problem)
     self.pop_size = pop_size

예제 #31

0

파일 보기

파일: netease.py 프로젝트: leejanhai/163music-APlayer-you-get-docker-mod

# -*- coding: utf-8 -*-
import requests
import hashlib
import base64
import re
import json
import time
from functools import lru_cache
from concurrent.futures import ProcessPoolExecutor
from requests import Session
from requests_futures.sessions import FuturesSession
session = FuturesSession(executor=ProcessPoolExecutor(max_workers=10),
                         session=Session())


def netease_hymn():
    return """
    player's Game Over,
    u can abandon.
    u get pissed,
    get pissed,
    Hallelujah my King!
    errr oh! f**k ohhh!!!!
    """


def encrypted_id(dfsId):
    x = [ord(i[0]) for i in netease_hymn().split()]
    y = ''.join([chr(i - 61) if i > 96 else chr(i + 32) for i in x])
    byte1 = bytearray(y, encoding='ascii')
    byte2 = bytearray(str(dfsId), encoding='ascii')

예제 #32

0

파일 보기

    def parse_cmds_in_parallel(self, cmds, unwrap, total_cmds=None):
        if os.environ.get("CLADE_DEBUG"):
            if total_cmds:
                self.log("Parsing {} commands".format(total_cmds))

            for cmd in cmds:
                unwrap(self, cmd)
            return

        if self.conf.get("cpu_count"):
            max_workers = self.conf.get("cpu_count")
        else:
            max_workers = os.cpu_count()

        # cmds is eather list, tuple or generator
        if type(cmds) is list or type(cmds) is tuple:
            total_cmds = len(cmds)

        # Print progress only of we know total number of commands
        if total_cmds:
            self.log("Parsing {} commands".format(total_cmds))

        with ProcessPoolExecutor(max_workers=max_workers) as p:
            chunk_size = 2000
            futures = []
            finished_cmds = 0

            # Submit commands to executor in chunks
            for cmd_chunk in self.__get_cmd_chunk(cmds, chunk_size=chunk_size):
                chunk_futures = []

                for cmd in cmd_chunk:
                    f = p.submit(unwrap, self, cmd)
                    chunk_futures.append(f)
                    futures.append(f)

                while True:
                    if not futures:
                        break

                    done_futures = [x for x in futures if x.done()]

                    # Remove all futures that are already completed
                    # to reduce memory usage
                    futures = [x for x in futures if not x.done()]

                    # Track progress (only if stdout is not redirected)
                    if total_cmds and sys.stdout.isatty(
                    ) and self.conf["log_level"] in ["INFO", "DEBUG"]:
                        finished_cmds += len(done_futures)

                        msg = "\t [{:.0f}%] {} of {} commands are parsed".format(
                            finished_cmds / total_cmds * 100,
                            finished_cmds,
                            total_cmds,
                        )
                        print(msg, end="\r")

                    # Check return value of all finished futures
                    for f in done_futures:
                        try:
                            f.result()
                        except Exception as e:
                            raise RuntimeError(
                                "Something happened in the child process: {}".
                                format(e))

                    # Submit next chunk if the current one is almost processed
                    finished_chunk_cmds = len(
                        [x for x in chunk_futures if x.done()])
                    if finished_chunk_cmds > (chunk_size - chunk_size // 10):
                        break

                    # Save a little bit of CPU time
                    # skip sleep only for very small projects
                    time.sleep(0.1)

            if total_cmds and sys.stdout.isatty(
            ) and self.conf["log_level"] in ["INFO", "DEBUG"]:
                print(" " * 79, end="\r")

예제 #33

0

파일 보기

파일: ffmpegtest.py 프로젝트: vedant-sanil/multiprocess_ffmpeg

def Main():
    pool = ProcessPoolExecutor(numThreads)
    result = pool.map(ffmpegConvert, fileList)

예제 #34

0

파일 보기

파일: new_listing_auto_camp.py 프로젝트: ZhangYupengCHY/recommend

    error2: error type2 file empty
    error3: error type3 do not have needed data
"""

import rsa, json, requests, os, redis, zipfile, shutil, time, re
import numpy as np
# from retry import retry
import pandas as pd
# import Crypto.PublicKey.RSA
import base64, pymysql
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed, ProcessPoolExecutor
from .mongo_con import MongoDb

THREAD_POOL = ThreadPoolExecutor(6)
PROCESS_POOL = ProcessPoolExecutor(2)
redis_pool_6 = redis.ConnectionPool(host='127.0.0.1',
                                    port=6379,
                                    db=6,
                                    password='******',
                                    decode_responses=True)
redis_pool_7 = redis.ConnectionPool(host='127.0.0.1',
                                    port=6379,
                                    db=7,
                                    password='******',
                                    decode_responses=True)
red = redis.StrictRedis(connection_pool=redis_pool_7)
red_station_status = redis.StrictRedis(connection_pool=redis_pool_6)

exchange_rate = {
    'CA': 0.7519,

예제 #35

0

파일 보기

def main():
    # command-line options
    define("debug", default=False, help="run in debug mode", type=bool)
    define("no_cache", default=False, help="Do not cache results", type=bool)
    define(
        "localfiles",
        default="",
        help=
        "Allow to serve local files under /localfile/* this can be a security risk",
        type=str)
    define("port", default=5000, help="run on the given port", type=int)
    define("cache_expiry_min",
           default=10 * 60,
           help="minimum cache expiry (seconds)",
           type=int)
    define("cache_expiry_max",
           default=2 * 60 * 60,
           help="maximum cache expiry (seconds)",
           type=int)
    define("mc_threads",
           default=1,
           help="number of threads to use for Async Memcache",
           type=int)
    define("threads",
           default=1,
           help="number of threads to use for rendering",
           type=int)
    define("processes",
           default=0,
           help="use processes instead of threads for rendering",
           type=int)
    define("frontpage",
           default=FRONTPAGE_JSON,
           help="path to json file containing frontpage content",
           type=str)
    tornado.options.parse_command_line()

    # NBConvert config
    config = Config()
    config.HTMLExporter.template_file = 'basic'
    config.NbconvertApp.fileext = 'html'
    config.CSSHTMLHeaderTransformer.enabled = False
    # don't strip the files prefix - we use it for redirects
    # config.Exporter.filters = {'strip_files_prefix': lambda s: s}

    # DEBUG env implies both autoreload and log-level
    if os.environ.get("DEBUG"):
        options.debug = True
        logging.getLogger().setLevel(logging.DEBUG)

    # setup memcache
    mc_pool = ThreadPoolExecutor(options.mc_threads)
    if options.processes:
        # can't pickle exporter instances,
        exporter = HTMLExporter
        pool = ProcessPoolExecutor(options.processes)
    else:
        exporter = HTMLExporter(config=config, log=log.app_log)
        pool = ThreadPoolExecutor(options.threads)

    memcache_urls = os.environ.get('MEMCACHIER_SERVERS',
                                   os.environ.get('MEMCACHE_SERVERS'))
    if options.no_cache:
        log.app_log.info("Not using cache")
        cache = MockCache()
    elif pylibmc and memcache_urls:
        kwargs = dict(pool=mc_pool)
        username = os.environ.get('MEMCACHIER_USERNAME', '')
        password = os.environ.get('MEMCACHIER_PASSWORD', '')
        if username and password:
            kwargs['binary'] = True
            kwargs['username'] = username
            kwargs['password'] = password
            log.app_log.info("Using SASL memcache")
        else:
            log.app_log.info("Using plain memecache")

        cache = AsyncMultipartMemcache(memcache_urls.split(','), **kwargs)
    else:
        log.app_log.info("Using in-memory cache")
        cache = DummyAsyncCache()

    # setup tornado handlers and settings

    template_path = pjoin(here, 'templates')
    static_path = pjoin(here, 'static')
    env = Environment(loader=FileSystemLoader(template_path))
    env.filters['markdown'] = markdown.markdown
    try:
        git_data = git_info(here)
    except Exception as e:
        app_log.error("Failed to get git info: %s", e)
        git_data = {}
    else:
        git_data['msg'] = escape(git_data['msg'])

    if options.no_cache:
        # force jinja to recompile template every time
        env.globals.update(cache_size=0)
    env.globals.update(
        nrhead=nrhead,
        nrfoot=nrfoot,
        git_data=git_data,
        ipython_info=ipython_info(),
        len=len,
    )
    AsyncHTTPClient.configure(HTTPClientClass)
    client = AsyncHTTPClient()
    github_client = AsyncGitHubClient(client)

    # load frontpage sections
    with io.open(options.frontpage, 'r') as f:
        frontpage_sections = json.load(f)

    # cache frontpage links for the maximum allowed time
    max_cache_uris = {''}
    for section in frontpage_sections:
        for link in section['links']:
            max_cache_uris.add('/' + link['target'])

    settings = dict(
        log_function=log_request,
        jinja2_env=env,
        static_path=static_path,
        client=client,
        github_client=github_client,
        exporter=exporter,
        config=config,
        cache=cache,
        cache_expiry_min=options.cache_expiry_min,
        cache_expiry_max=options.cache_expiry_max,
        max_cache_uris=max_cache_uris,
        frontpage_sections=frontpage_sections,
        pool=pool,
        gzip=True,
        render_timeout=20,
        localfile_path=os.path.abspath(options.localfiles),
        fetch_kwargs=dict(connect_timeout=10, ),
    )

    # create and start the app
    if options.localfiles:
        log.app_log.warning(
            "Serving local notebooks in %s, this can be a security risk",
            options.localfiles)
        # use absolute or relative paths:
        handlers.insert(0, (r'/localfile/(.*)', LocalFileHandler))

    app = web.Application(handlers, debug=options.debug, **settings)
    http_server = httpserver.HTTPServer(app, xheaders=True)
    log.app_log.info("Listening on port %i", options.port)
    http_server.listen(options.port)
    ioloop.IOLoop.instance().start()

예제 #36

0

파일 보기

파일: utils.py 프로젝트: w1146869587/mirage

from PIL import Image as PILImage

from nio.crypto import AsyncDataT as File
from nio.crypto import async_generator_from_data

if sys.version_info >= (3, 7):
    from contextlib import asynccontextmanager
else:
    from async_generator import asynccontextmanager

AsyncOpenFile = Union[AsyncTextIOWrapper, AsyncBufferedReader]
Size          = Tuple[int, int]
BytesOrPIL    = Union[bytes, PILImage.Image]
auto          = autostr

COMPRESSION_POOL = ProcessPoolExecutor()


class AutoStrEnum(Enum):
    """An Enum where auto() assigns the member's name instead of an integer.

    Example:
    >>> class Fruits(AutoStrEnum): apple = auto()
    >>> Fruits.apple.value
    "apple"
    """

    @staticmethod
    def _generate_next_value_(name, *_):
        return name

예제 #37

0

파일 보기

from concurrent.futures import ProcessPoolExecutor
import requests


def fetch_async(url):
    response = requests.get(url)
    return response


url_list = ['http://www.github.com', 'http://www.bing.com']
pool = ProcessPoolExecutor(5)
for url in url_list:
    pool.submit(fetch_async, url)
pool.shutdown(wait=True)

예제 #38

0

파일 보기

파일: make_precomputed_registered.py 프로젝트: BrainCOGS/precomputed

            layer_dir=layer_dir,
            resolution=resolution,
            atlas_name=atlas_name.lower())
    elif step == 'step1':
        print("step 1")
        vol = CloudVolume(f'file://{layer_dir}')
        if atlas_name == 'paxinos':
            done_files = set([ int(y) for y in os.listdir(progress_dir) ])
            all_files = set(range(vol.bounds.minpt.y, vol.bounds.maxpt.y)) 
            to_upload = [ int(y) for y in list(all_files.difference(done_files)) ]
        done_files = set([ int(z) for z in os.listdir(progress_dir)])
        all_files = set(range(vol.bounds.minpt.z,vol.bounds.maxpt.z))
        to_upload = [ int(z) for z in list(all_files.difference(done_files)) ]
        to_upload.sort()
        print("Have {len(to_upload)} slices remaining to upload",to_upload)
        if atlas_name == 'paxinos':
            with ProcessPoolExecutor(max_workers=4) as executor:
                for job in executor.map(process_paxinos_slice,to_upload):
                    try:
                        print(job)
                    except Exception as exc:
                        print(f'generated an exception: {exc}')
        else:
            with ProcessPoolExecutor(max_workers=4) as executor:
                for job in executor.map(process_slice,to_upload):
                    try:
                        print(job)
                    except Exception as exc:
                        print(f'generated an exception: {exc}')

예제 #39

0

파일 보기

파일: tse.py 프로젝트: Dee-Why/hp-tuner

    def run(self, train_base_models=True):
        start_time = time.time()

        from concurrent.futures import ProcessPoolExecutor
        pool = ProcessPoolExecutor(max_workers=self.n_workers)
        X, y = [], []
        c = []
        inc = 1.
        X_l, y_l = [], []

        weight = np.array([1 / self.K] * (self.K + 1))
        config_evaluated = []
        config_space = get_benchmark_configspace('covtype_svm')
        # Initialize config L.
        config_L = sample_configurations(config_space, self.num_L_init)

        if train_base_models:
            func_configs = list()
            for iter_t in range(self.K):
                print('Build mid fidelity model', iter_t)
                func_configs.append(True)
            func_configs.append(False)
            training_data = self.run_parallel_async(pool, self.mini_smac,
                                                    func_configs)
            with open('data/xgb/base_%s_data.pkl' % self.method_name,
                      'wb') as f:
                pickle.dump(training_data, f)
        else:
            with open('data/xgb/base_tse_data_%d.pkl' % 10, 'rb') as f:
                training_data = pickle.load(f)
            print('Load training data for M evaluations!')

        # Create base models.
        base_models = list()
        config_space = get_benchmark_configspace('covtype_svm')
        types, bounds = get_types(config_space)
        for iter_t in range(self.K + 1):
            config_x, config_y = training_data[iter_t]
            model = RandomForestWithInstances(types=types, bounds=bounds)
            model.train(config_x, config_y)
            base_models.append(model)
        low_fidelity_model = base_models[self.K]
        X_l.extend(training_data[self.K][0].tolist())
        y_l.extend(training_data[self.K][1].tolist())
        print('Base model building finished!')

        # The framework of TSE.
        for iter_t in range(self.iter_H):
            print('Iteration in TSE', iter_t)
            # Sample a batch of configurations according to tse model.
            configs = sample_configurations(config_space, self.iter_L * 10)
            config_arrays = convert_configurations_to_array(configs)
            perfs, _ = low_fidelity_model.predict(config_arrays)
            perfs = perfs[:, 0]
            if len(y) > 3:
                preds = []
                for i in range(self.K):
                    m, _ = base_models[i].predict(config_arrays)
                    preds.append(m[:, 0].tolist())
                preds = np.array(preds).T
                preds = np.mat(np.hstack((preds, np.ones((len(configs), 1)))))
                # Add the delta.
                delta = preds * np.mat(weight.reshape(-1, 1))
                perfs += delta.getA()[:, 0]
            configs_candidate = []
            indexes = np.argsort(perfs)[:self.iter_L]
            for index in indexes:
                configs_candidate.append(configs[index])

            # Evaluate the low-fidelity configurations.
            print('=' * 10 + 'Evaluating the low-fidelity configurations')
            config_params = []
            for config in configs_candidate:
                config_params.append((config.get_dictionary(), self.s_min))

            result_perf = self.run_parallel_async(pool,
                                                  self.objective_function,
                                                  config_params)

            for index, item in enumerate(result_perf):
                X_l.append(configs_candidate[index].get_array().tolist())
                y_l.append(item[0])

            print(np.array(X_l).shape, np.array(y_l, dtype=np.float64).shape)
            # Update f_L.
            print('=' * 10 + 'Retrain the f_L')
            low_fidelity_model.train(np.array(X_l),
                                     np.array(y_l, dtype=np.float64))
            config_L.extend(configs_candidate)

            configs_input = []
            for config in config_L:
                if config not in config_evaluated:
                    configs_input.append(config)

            # Choose the next configuration.
            config_arrays = convert_configurations_to_array(configs_input)
            perfs, _ = low_fidelity_model.predict(config_arrays)
            perfs = perfs[:, 0]
            if len(y) > 3:
                preds = []
                for i in range(self.K):
                    m, _ = base_models[i].predict(config_arrays)
                    preds.append(m[:, 0].tolist())
                preds = np.array(preds).T
                preds = np.mat(
                    np.hstack((preds, np.ones((len(configs_input), 1)))))
                # Add the delta.
                delta = preds * np.mat(weight.reshape(-1, 1))
                perfs += delta.getA()[:, 0]
            next_config = configs_input[np.argmin(perfs)]

            # Evaluate this config with a high-fidelity setting.
            print('=' * 10 + 'Evaluate the high-fidelity configuration')
            perf, _ = self.objective_function(
                (next_config.get_dictionary(), self.s_max))
            X.append(next_config)
            y.append(perf)
            if perf < inc:
                inc = perf
            c.append([time.time() - start_time, inc])
            print('Current inc', inc)

            if len(y) < 3:
                continue
            # Learn the weight in TSE.
            Z = []
            for i in range(self.K):
                m, v = base_models[i].predict(
                    convert_configurations_to_array(X))
                Z.append(m[:, 0].tolist())
            Z = np.mat(np.hstack((np.array(Z).T, np.ones((len(y), 1)))))
            f = np.mat(np.array(y).reshape((-1, 1)))
            # Compute the weight.
            try:
                ZtZ_inv = np.linalg.inv(Z.T * Z)
                weight = (ZtZ_inv * Z.T * f)[:, 0]
                print('The weight updated is', weight)
            except np.linalg.LinAlgError as err:
                if 'Singular matrix' in str(err):
                    print(
                        'Singular matrix encountered, and do not update the weight!'
                    )
                else:
                    raise ValueError('Unexpected error!')

            # Save the result.
            np.save(self.file_path, np.transpose(np.array(c)))
            plt.plot(np.array(c)[:, 0], np.array(c)[:, 1])
            plt.xlabel('time_elapsed (s)')
            plt.ylabel('validation error')
            plt.savefig("data/xgb/%s.png" % self.method_name)
            if time.time() - start_time > self.runtime_limit:
                print('Runtime budget meets!')
                break

        pool.shutdown(wait=True)

예제 #40

0

파일 보기

def main():
    print("Main method called")

    files = glob.glob('root/zipfiles/*')
    for f in files:
        os.remove(f)

    shutil.rmtree('data/books')
    if not os.path.exists('data/books'):
        os.makedirs('data/books')

    tar = tarfile.open(name="root/archive.tar")
    tar.extractall()

    dir_name = "root/zipfiles"
    extension = ".zip"

    for item in os.listdir(dir_name):
        if item.endswith(extension):
            file_name = dir_name + "/" + item
            zip_ref = zipfile.ZipFile(file_name)  # create zipfile object
            try:
                zip_ref.extractall("data/books")  # extract file to dir
                print(file_name)
            except NotImplementedError:
                print("Could not unzip: " + file_name + " - continuing")
            zip_ref.close()

    dir_name = "data/books"
    extension = ".txt"

    cities_csv = pd.read_csv('data/cities/cities15000.csv',
                             header=0,
                             sep=';',
                             usecols=['englishName', 'latitude', 'longitude'])

    books = list()
    authors = set()
    complete_author_list = list()
    cities = set()

    th_ex = ProcessPoolExecutor(max_workers=6)
    futures = []

    count = 0
    for root, directories, files in os.walk(dir_name):
        for file in files:
            if file.endswith(extension):
                count = count + 1
                future = th_ex.submit(get_results, root, file, cities_csv,
                                      count)
                futures.append(future)
                pp.pprint(count)

    th_ex.shutdown(wait=True)
    print("Done with everything. Unwrapping results...")

    broken_count = 0
    for th in futures:
        try:
            result = th.result()
            books.append(result[0])

            for author in result[1]:
                authors.add(author)

            for city in result[2]:
                cities.add(city)
        except concurrent.futures.process.BrokenProcessPool:
            broken_count = broken_count + 1
            print("BrokenProcessPool: #%d" % broken_count)

    for idx, val in enumerate(authors):
        complete_author_list.append((idx, val))

    with open('data/csv/neo-cities.csv', 'w+', newline='',
              encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter='|', escapechar='\\')
        writer.writerow(['name', 'location:ID(Location-ID)'])
        for city in cities:
            writer.writerow(city)

    with open('data/csv/neo-books.csv', 'w+', newline='',
              encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter='|', escapechar='\\')
        writer.writerow(['bookId:ID(Book-ID)', 'title'])
        for value in books:
            title = value['title']
            title = title.rstrip()
            title = " ".join(title.split())
            writer.writerow([value['id'], title])

    with open('data/csv/neo-authors.csv', 'w+', newline='',
              encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter='|', escapechar='\\')
        writer.writerow(['authorId:ID(Author-ID)', 'name'])
        for author in complete_author_list:
            auth = " ".join(author[1].split())
            if auth or auth is not None:
                writer.writerow([author[0], auth])

    with open('data/csv/neo-books-cities.csv', 'w+', newline='') as csv_file:
        writer = csv.writer(csv_file, delimiter='|')
        writer.writerow([':START_ID(Book-ID)', ':END_ID(Location-ID)'])

        for book in books:
            for city in book['cities']:
                writer.writerow([book['id'], city[1]])

    with open('data/csv/neo-books-authors.csv', 'w+', newline='') as csv_file:
        writer = csv.writer(csv_file, delimiter='|')
        writer.writerow([':START_ID(Book-ID)', ':END_ID(Author-ID)'])
        for book in books:
            for author in book['authors']:
                auth = [
                    item for item in complete_author_list if item[1] == author
                ]
                writer.writerow([book['id'], auth[0][0]])

    with open('data/csv/postgres-cities.csv',
              'w+',
              newline='',
              encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter=';', escapechar='\\')
        writer.writerow(['name', 'location'])
        for city in cities:
            writer.writerow(city)

    with open('data/csv/postgres-books.csv',
              'w+',
              newline='',
              encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter='|', escapechar='\\')
        writer.writerow(['book_id', 'title'])
        for value in books:
            title = value['title']
            title = title.rstrip()
            title = " ".join(title.split())
            writer.writerow([value['id'], title])

    with open('data/csv/postgres-authors.csv',
              'w+',
              newline='',
              encoding='utf-8') as csv_file:
        writer = csv.writer(csv_file, delimiter='|', escapechar='\\')
        writer.writerow(['author_id', 'name'])
        for author in complete_author_list:
            auth = " ".join(author[1].split())
            if auth or auth is not None:
                writer.writerow([author[0], auth])

    with open('data/csv/postgres-books-cities.csv', 'w+',
              newline='') as csv_file:
        writer = csv.writer(csv_file, delimiter='|')
        writer.writerow(['book_id', 'location'])

        for book in books:
            for city in book['cities']:
                writer.writerow([book['id'], city[1]])

    with open('data/csv/postgres-books-authors.csv', 'w+',
              newline='') as csv_file:
        writer = csv.writer(csv_file, delimiter='|')
        writer.writerow(['book_id', 'author_id'])
        for book in books:
            for author in book['authors']:
                auth = [
                    item for item in complete_author_list if item[1] == author
                ]
                writer.writerow([book['id'], auth[0][0]])

예제 #41

0

파일 보기

 def __init__(self, max_concurrent=5):
     self.pool = ProcessPoolExecutor(max_workers=max_concurrent)
     self.queue = []

예제 #42

0

파일 보기

                                     executor=executor)


async def benchmark_signature_validation_parallel(n, executor):
    tasks = [
        verify_signature_async(hash, signature, public_key, executor=executor)
        for i in range(n)
    ]
    await asyncio.gather(*tasks)


if __name__ == "__main__":
    loop = asyncio.get_event_loop()

    te = ThreadPoolExecutor(max_workers=1)
    pe = ProcessPoolExecutor(max_workers=1)

    n = 1000
    # Blake2b serial
    start = time.time()
    sync_benchmark_blake2b(n)
    print(f"blake2b, n={n} sync done in {(time.time()-start)*1000} ms")

    start = time.time()
    fut: asyncio.Future = asyncio.ensure_future(benchmark_blake2b(n, te))
    loop.run_until_complete(fut)
    print(f"blake2b, n={n} thread done in {(time.time()-start)*1000} ms")

    start = time.time()
    fut: asyncio.Future = asyncio.ensure_future(benchmark_blake2b(n, pe))
    loop.run_until_complete(fut)

예제 #43

0

파일 보기

파일: pa_dy2018.py 프로젝트: rogerfederal/Python_Study

            resource_url = "https://www.dy2018.com" + href.attrs['href']
            getInfo(title, resource_url)


def getInfo(title, resource_url):
    response2 = requests.get(resource_url, verify=False, headers=header)
    soup2 = BeautifulSoup(
        response2.text.encode(response2.encoding).decode('gbk'), "html.parser")
    for links in soup2.find_all("td", attrs={"style":
                                             "WORD-WRAP: break-word"}):
        print("{0}++{1}".format(title, links.text))
        conn = pymysql.connect(host='127.0.0.1',
                               port=3333,
                               user='******',
                               passwd='Xiaoxian0910',
                               db='airasia',
                               charset='utf8')
        cursor = conn.cursor()
        sql = "INSERT INTO dy2018.`bikan` (name,link) VALUES(%s,%s)"
        cursor.execute(sql, (title, links.text))
        conn.commit()
        cursor.close()
        conn.close()
    # links = re.findall(r'magnet:\?xt=urn:btih:(?:[A-Z]|[0-9])*',response2)
    # for link in links:
    #     print(link)


if __name__ == "__main__":
    with ProcessPoolExecutor(max_workers=10) as pool:
        pool.map(Gen_url, urls)

예제 #44

0

파일 보기

파일: performance.py 프로젝트: XiangyangYe/Crawler

#    		pool.submit(fetch_request,url)

# pool.shutdown(True)

#进程池+回调函数
from concurrent.futures import ProcessPoolExecutor
import requests


def fetch_async(url):
    response = requests.get(url)
    return response


def callback(future):
    print(future.result().text)


url_list = [
    'http://www.baidu.com', 'http://www.bing.com', 'http://www.cnblogs.com/'
]

pool = ProcessPoolExecutor(5)

if __name__ == '__main__':
    for url in url_list:
        v = pool.submit(fetch_async, url)
        v.add_done_callback(callback)

pool.shutdown()

예제 #45

0

파일 보기

파일: cpa.py 프로젝트: boazde/traffic

def closest_point_of_approach(
    traffic: Traffic,
    lateral_separation: float,
    vertical_separation: float,
    projection: Union[pyproj.Proj, crs.Projection, None] = None,
    round_t: str = "d",
    max_workers: int = 4,
) -> CPA:
    """
    Computes a CPA dataframe for all pairs of trajectories candidates for
    being separated by less than lateral_separation in vertical_separation.

    In order to be computed efficiently, the method needs the following
    parameters:

    - projection: a first filtering is applied on the bounding boxes of
    trajectories, expressed in meters. You need to provide a decent
    projection able to approximate distances by Euclide formula.
    By default, EuroPP() projection is considered, but a non explicit
    argument will raise a warning.

    - round_t: an additional column will be added in the DataFrame to group
    trajectories by relevant time frames. Distance computations will be
    considered only between trajectories flown in the same time frame.
    By default, the 'd' pandas freq parameter is considered, to group
    trajectories by day, but other ways of splitting ('h') may be more
    relevant and impact performance.

    - max_workers: distance computations are spread over a given number of
    processors.

    """

    if projection is None:
        logging.warn("Defaulting to projection EuroPP()")
        projection = crs.EuroPP()

    if isinstance(projection, crs.Projection):
        projection = pyproj.Proj(projection.proj4_init)

    def yield_pairs(t_chunk: Traffic):
        """
        This function yields all pairs of possible candidates for a CPA
        calculation.
        """

        # combinations types Iterator[Tuple[T, ...]]
        for first, second in cast(Iterator[Tuple[Flight, Flight]],
                                  combinations(t_chunk, 2)):
            # cast are necessary because of the lru_cache × property bug
            if (cast(pd.Timestamp, first.start) > cast(
                    pd.Timestamp, second.stop)) or (cast(
                        pd.Timestamp, second.start) > cast(
                            pd.Timestamp, first.stop)):
                # Flights must fly at the same time
                continue
            if (first.min("altitude") >
                    second.max("altitude") + vertical_separation):
                # Bounding boxes in altitude must cross
                continue
            if (second.min("altitude") >
                    first.max("altitude") + vertical_separation):
                # Bounding boxes in altitude must cross
                continue
            if first.min("x") > second.max("x") + lateral_separation:
                # Bounding boxes in x must cross
                continue
            if second.min("x") > first.max("x") + lateral_separation:
                # Bounding boxes in x must cross
                continue
            if first.min("y") > second.max("y") + lateral_separation:
                # Bounding boxes in y must cross
                continue
            if second.min("y") > first.max("y") + lateral_separation:
                # Bounding boxes in y must cross
                continue

            # Next step is to check the 2D footprint of the trajectories
            # intersect. Before computing the intersection we bufferize the
            # trajectories by half the requested separation.

            first_shape = first.project_shape(projection)
            second_shape = second.project_shape(projection)
            if first_shape is None or second_shape is None:
                continue

            first_shape = first_shape.simplify(1e3).buffer(lateral_separation /
                                                           2)
            second_shape = first_shape.simplify(1e3).buffer(
                lateral_separation / 2)

            if first_shape.intersects(second_shape):
                yield first, second

    t_xyt = (traffic.airborne().compute_xy(projection).assign(
        round_t=lambda df: df.timestamp.dt.round(round_t)))

    cumul = list()

    # Multiprocessing is implemented on each timerange slot only.
    # TODO: it would probably be more efficient to multiprocess over each
    # t_chunk rather than multiprocess the distance computation.

    for _, t_chunk in tqdm(t_xyt.groupby("round_t"),
                           total=len(set(t_xyt.data.round_t))):
        with ProcessPoolExecutor(max_workers=max_workers) as executor:
            tasks = {
                # TODO submit(Flight.distance, first, second)
                executor.submit(first.distance, second): (
                    first.flight_id,
                    second.flight_id,
                )
                for (first, second) in yield_pairs(Traffic(t_chunk))
            }

            for future in as_completed(tasks):
                cumul.append(future.result())

    return CPA(pd.concat(cumul, sort=False))

예제 #46

0

파일 보기

    yearAry = ["15", "16"]
    monthAry = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
    base_url1 = "http://agora.ex.nii.ac.jp/digital-typhoon/globe/color/20{}/8192x8192/MTS1{}00.globe.0.jpg"
    base_url2 = "http://agora.ex.nii.ac.jp/digital-typhoon/globe/color/20{}/8192x8192/MTS2{}00.globe.0.jpg"
    base_url3 = "http://agora.ex.nii.ac.jp/digital-typhoon/globe/color/20{}/8192x8192/HMW8{}00.globe.0.jpg"
    path = "/Users/iii/Desktop/imgs/"

    queue = Queue()
    for s_year in yearAry:
        for idx, month_num in enumerate(monthAry):
            if idx + 1 < 7:
                continue
            month = idx + 1
            s_month = "0" + str(idx + 1) if idx + 1 < 10 else str(idx + 1)
            for date in range(month_num):
                s_date = "0" + str(date + 1) if date + 1 < 10 else str(date +
                                                                       1)
                s_total = s_year + "," + s_year + s_month + s_date
                queue.put(s_total)
    # threads = map(lambda i: Thread(target=worker), xrange(NUM_THREADS))
    # map(lambda th: th.start(), threads)
    # map(lambda th: th.join(), threads)

    cpus = multiprocessing.cpu_count()
    print cpus

    with ProcessPoolExecutor(max_workers=cpus) as executor:
        while not queue.empty():
            page = queue.get()
            executor.submit(crawler, page)

예제 #47

0

파일 보기

파일: bot.py 프로젝트: iCodeIN/yui

    def __init__(
        self,
        config: Config,
        loop,
        *,
        orm_base=None,
        using_box: Box = None,
    ) -> None:
        """Initialize"""

        logging.config.dictConfig(config.LOGGING)

        logger = logging.getLogger(f'{__name__}.Bot.__init__')

        logger.info('start')

        Namespace._bot = self

        self.process_pool_executor = ProcessPoolExecutor()
        self.thread_pool_executor = ThreadPoolExecutor()

        logger.info('connect to DB')
        config.DATABASE_ENGINE = get_database_engine(config)

        logger.info('connect to memcache')
        self.mc = aiomcache.Client(
            host=config.CACHE['HOST'],
            port=config.CACHE['PORT'],
        )
        self.cache = Cache(self.mc, config.CACHE.get('PREFIX', 'YUI_'))

        logger.info('import apps')
        for app_name in config.APPS:
            logger.debug('import apps: %s', app_name)
            importlib.import_module(app_name)

        self.config = config
        self.loop = loop
        self.loop.set_debug(self.config.DEBUG)
        self.orm_base = orm_base or Base
        self.box = using_box or box
        self.queue: asyncio.Queue = asyncio.Queue()
        self.api = SlackAPI(self)
        self.channels: list[PublicChannel] = []
        self.ims: list[DirectMessageChannel] = []
        self.groups: list[PrivateChannel] = []
        self.users: list[User] = []
        self.restart = False
        self.is_ready = False
        self.method_last_call: defaultdict[str, datetime] = defaultdict(now)
        self.method_queue: defaultdict[str, list] = defaultdict(list)

        self.config.check(
            self.box.config_required,
            self.box.channel_required,
            self.box.channels_required,
            self.box.user_required,
            self.box.users_required,
        )

        if self.config.REGISTER_CRONTAB:
            logger.info('register crontab')
            self.register_tasks()

예제 #48

0

파일 보기

def _run_tests(all_tests, log_name_base, extra_args):
    global stop, executor, futures, system_compiler
    xmlname = log_name_base + '.xml'
    junit_root = ET.Element('testsuites')
    conf_time = 0
    build_time = 0
    test_time = 0
    passing_tests = 0
    failing_tests = 0
    skipped_tests = 0
    commands = (compile_commands, clean_commands, install_commands,
                uninstall_commands)

    try:
        # This fails in some CI environments for unknown reasons.
        num_workers = multiprocessing.cpu_count()
    except Exception as e:
        print(
            'Could not determine number of CPUs due to the following reason:' +
            str(e))
        print('Defaulting to using only one process')
        num_workers = 1
    # Due to Ninja deficiency, almost 50% of build time
    # is spent waiting. Do something useful instead.
    #
    # Remove this once the following issue has been resolved:
    # https://github.com/mesonbuild/meson/pull/2082
    num_workers *= 2
    executor = ProcessPoolExecutor(max_workers=num_workers)

    for name, test_cases, skipped in all_tests:
        current_suite = ET.SubElement(junit_root, 'testsuite', {
            'name': name,
            'tests': str(len(test_cases))
        })
        print()
        if skipped:
            print(bold('Not running %s tests.' % name))
        else:
            print(bold('Running %s tests.' % name))
        print()
        futures = []
        for t in test_cases:
            # Jenkins screws us over by automatically sorting test cases by name
            # and getting it wrong by not doing logical number sorting.
            (testnum, testbase) = os.path.split(t)[-1].split(' ', 1)
            testname = '%.3d %s' % (int(testnum), testbase)
            should_fail = False
            if name.startswith('failing'):
                should_fail = name.split('failing-')[1]
            result = executor.submit(run_test, skipped, t, extra_args,
                                     system_compiler, backend, backend_flags,
                                     commands, should_fail)
            futures.append((testname, t, result))
        for (testname, t, result) in futures:
            sys.stdout.flush()
            result = result.result()
            if result is None or 'MESON_SKIP_TEST' in result.stdo:
                print(yellow('Skipping:'), t)
                current_test = ET.SubElement(current_suite, 'testcase', {
                    'name': testname,
                    'classname': name
                })
                ET.SubElement(current_test, 'skipped', {})
                skipped_tests += 1
            else:
                without_install = "" if len(
                    install_commands) > 0 else " (without install)"
                if result.msg != '':
                    print(
                        red('Failed test{} during {}: {!r}'.format(
                            without_install, result.step.name, t)))
                    print('Reason:', result.msg)
                    failing_tests += 1
                    if result.step == BuildStep.configure and result.mlog != no_meson_log_msg:
                        # For configure failures, instead of printing stdout,
                        # print the meson log if available since it's a superset
                        # of stdout and often has very useful information.
                        failing_logs.append(result.mlog)
                    else:
                        failing_logs.append(result.stdo)
                    failing_logs.append(result.stde)
                else:
                    print('Succeeded test%s: %s' % (without_install, t))
                    passing_tests += 1
                conf_time += result.conftime
                build_time += result.buildtime
                test_time += result.testtime
                total_time = conf_time + build_time + test_time
                log_text_file(logfile, t, result.stdo, result.stde)
                current_test = ET.SubElement(
                    current_suite, 'testcase', {
                        'name': testname,
                        'classname': name,
                        'time': '%.3f' % total_time
                    })
                if result.msg != '':
                    ET.SubElement(current_test, 'failure',
                                  {'message': result.msg})
                stdoel = ET.SubElement(current_test, 'system-out')
                stdoel.text = result.stdo
                stdeel = ET.SubElement(current_test, 'system-err')
                stdeel.text = result.stde
    print("\nTotal configuration time: %.2fs" % conf_time)
    print("Total build time: %.2fs" % build_time)
    print("Total test time: %.2fs" % test_time)
    ET.ElementTree(element=junit_root).write(xmlname,
                                             xml_declaration=True,
                                             encoding='UTF-8')
    return passing_tests, failing_tests, skipped_tests

예제 #49

0

파일 보기

파일: __main__.py 프로젝트: facebookresearch/deepmeg-recurrent-encoder

def main():

    # Make repository
    parser = get_parser()
    args = parser.parse_args()
    out_reg, out_autoreg = make_repo_from_parser(args)

    # Prepare model labels
    if (not args.with_init) and (args.with_forcing):
        label_add = "(no init)"
    elif (args.with_init) and (not args.with_forcing):
        label_add = "(no forcing)"
    elif (args.with_init) and (args.with_forcing):
        label_add = ""
    elif (not args.with_init) and (not args.with_forcing):
        label_add = "(no init, no forcing)"

    # Initialize result dicts
    reg_results = {"label": "lin reg " + label_add,
                   "scores": []}
    shuffled_results = {"word_freqs": [],
                        "word_lengths": []}
    autoreg_results = {"label": "lin autoreg " + label_add,
                       "scores": []}

    # Loop over subjects (in parallel)
    with ProcessPoolExecutor(args.n_workers) as pool:

        pendings = []
        for sub in range(args.n_subjects):
            pendings.append(
                pool.submit(
                    eval_lin_models,
                    sub,
                    args.data,
                    out_reg,
                    out_autoreg,
                    with_forcing=args.with_forcing,
                    with_init=args.with_init,
                    shuffle=args.shuffle))

        for pending in tqdm.tqdm(pendings):
            (score_linreg, score_linautoreg, shuffled) = pending.result()

            # stack results in lists
            reg_results["scores"].append(score_linreg)
            autoreg_results["scores"].append(score_linautoreg)
            for key, score_shuffled in shuffled.items():
                shuffled_results[key].append(score_shuffled)

    # Making numpy arrays from lists
    reg_results["scores"] = np.array(reg_results["scores"])
    autoreg_results["scores"] = np.array(autoreg_results["scores"])
    for key in shuffled_results.keys():
        shuffled_results[key] = np.array(shuffled_results[key])

    # # Converting to torch arrays
    # reg_results["scores"] = torch.from_numpy(reg_results["scores"])
    # autoreg_results["scores"] = torch.from_numpy(autoreg_results["scores"])
    # for key in shuffled_results.keys():
    #     shuffled_results[key] = torch.from_numpy(shuffled_results[key])

    # Save
    torch.save(reg_results, out_reg / "reference_metrics.th")
    torch.save(autoreg_results, out_autoreg / "reference_metrics.th")

    if args.shuffle:
        for key, value in shuffled_results.items():
            torch.save({'scores': value, 'label': 'lin reg ' + label_add},
                       out_reg / f"shuffled_{key}_metrics.th")

예제 #50

0

파일 보기

파일: 03_get_article_data.py 프로젝트: bojcicm/bmc-scrapper

    total_articles = len(article_urls)
    processed_articles = get_processed_articles_list()
    total_processed = len(processed_articles)
    print()
    print(f'Urls loaded: {total_articles}')
    print(f'Processed:   {total_processed}')
    print()

    urls_to_process = [art['url'] for art in article_urls]
    print(len(urls_to_process))
    for done_art in processed_articles:
        if done_art['url'] in urls_to_process:
            urls_to_process.remove(done_art['url'])

    print(f'Starting reading article...')
    with ProcessPoolExecutor(4) as executor:
        for article_url_batch in batch(urls_to_process, 20):
            urls = [url for url in list(article_url_batch)]
            futures.append(
                executor.submit(run_get_page_articles_process_batch, urls))

        for future in as_completed(futures):
            result = future.result()
            if result is None:
                print('Article process result is None')
            else:
                print(f'Writing {len(result)} articles to articles file')
                total_processed = total_processed + len(result)
                write_processed_articles_to_file(result)
                print(
                    f'Processed {total_processed}/{total_articles} articles...'

예제 #51

0

파일 보기

def test_no_connection_sharing_among_processes(s3):
    executor = ProcessPoolExecutor()
    conn_id = executor.submit(_get_s3_id, s3).result()
    assert id(s3.connect()) != conn_id, \
        "Processes should not share S3 connections."

예제 #52

0

파일 보기

def _fit_model(RV,
               df_data,
               keys_d=None,
               kwrgs_pp={},
               stat_model=tuple,
               lags_i=list,
               verbosity=0):
    #%%
    #    stat_model = fc.stat_model_l[0]
    #    RV = fc.TV
    #    lags_i = [1]
    #    kwrgs_pp={}
    #    keys_d=None
    #    df_data = fc.df_data
    #
    # do forecasting accros lags
    splits = df_data.index.levels[0]
    y_pred_all = []
    y_pred_c = []

    models = []

    # store target variable (continuous and binary in y_ts dict)
    if hasattr(RV, 'RV_bin_fit'):
        y_ts = {'cont': RV.RV_ts_fit, 'bin': RV.RV_bin_fit}
    else:
        y_ts = {'cont': RV.RV_ts_fit}

    from time import time
    t0 = time()
    futures = {}
    with ProcessPoolExecutor(max_workers=max_cpu) as pool:
        for lag in lags_i:

            for split in splits:
                fitkey = f'{lag}_{split}'

                futures[fitkey] = pool.submit(fit,
                                              y_ts,
                                              df_data,
                                              lag,
                                              split,
                                              stat_model=stat_model,
                                              keys_d=keys_d,
                                              kwrgs_pp=kwrgs_pp,
                                              verbosity=verbosity)
        results = {key: future.result() for key, future in futures.items()}

    # unpack results
    models = dict()
    for lag in lags_i:
        y_pred_l = []
        model_lag = dict()
        for split in splits:
            prediction, model = results[f'{lag}_{split}']
            # store model
            model_lag[f'split_{split}'] = model

            # retrieve original input data
            df_norm = model.X
            TestRV = (df_norm['TrainIsTrue'] == False)[df_norm['y_pred']]
            y_pred_l.append(prediction[TestRV.values])

            if lag == lags_i[0]:
                # ensure that RV timeseries matches y_pred
                TrainRV = (df_norm['TrainIsTrue'])[df_norm['y_pred']]
                RV_bin = RV.RV_bin.loc[TrainRV.index]

                # predicting RV might not be possible
                # determining climatological prevailance in training data
                y_c_mask = np.logical_and(TrainRV, RV_bin.squeeze() == 1)
                y_clim_val = RV_bin[y_c_mask].size / RV_bin.size
                # filling test years with clim of training data
                y_clim = RV_bin[TestRV == True].copy()
                y_clim[:] = y_clim_val
                y_pred_c.append(y_clim)

        models[f'lag_{lag}'] = model_lag

        y_pred_l = pd.concat(y_pred_l)
        y_pred_l = y_pred_l.sort_index()

        if lag == lags_i[0]:
            y_pred_c = pd.concat(y_pred_c)
            y_pred_c = y_pred_c.sort_index()

        y_pred_all.append(y_pred_l)
    y_pred_all = pd.concat(y_pred_all, axis=1)
    print("\n")
    print(time() - t0)
    print(f'{stat_model} ')
    #%%
    return y_pred_all, y_pred_c, models

예제 #53

0

파일 보기

def submit_qca_optimization_dataset(
    dataset_name=None,
    metadata=None,
    compute_spec=None,
    input_molecules=None,
    server="from_file",
    threads=None,
    compute_tag=None,
    priority="normal",
    skip_compute=False,
):
    """
    Create or update an optimization dataset.

    Parameters
    ----------
    dataset_name : str
        The name of the dataset. This is needed if the dataset already exists and no
        metadata is supplied. Useful when e.g. adding computes or molecules to an existing dataset.

    metadata : str
        A filename specifying the metadata needed to create a new dataset, in JSON format.
        An example metadata has the following format:
        {
            "submitter": "trevorgokey",
            "creation_date": "2020-09-18",
            "collection_type": "OptimizationDataset",
            "dataset_name": "OpenFF Sandbox CHO PhAlkEthOH v1.0",
            "short_description": "A diverse set of CHO molecules",
            "long_description_url": "https://github.com/openforcefield/qca-dataset-submission/tree/master/submissions/2020-09-18-OpenFF-Sandbox-CHO-PhAlkEthOH",
            "long_description": "This dataset contains an expanded set of the AlkEthOH and
            PhEthOH datasets, which were used in the original derivation of the smirnoff99Frosst
            parameters.",
            "elements": [
                "C",
                "H",
                "O"
            ],
            "change_log": [
                {"author": "trevorgokey",
                 "date": "2020-09-18",
                 "version": "1.0",
                 "description": "A diverse set of CHO molecules. The molecules in this set were
                 generated to include all stereoisomers if chirality was ambiguous from the SMILES
                 input. Conformations were generated which had an RMSD of at least 4 Angstroms from
                 all other conformers"
                }
            ]
        }

    compute_spec : str
        A filename specifying the compute specifications for the dataset, in JSON format.

    input_molecules : str
        A filename specifying the molecules to load into the dataset as entries, in JSON format.

    server : str
        The server URI to connect to. The special value 'from_file' will read from the default
        server connection config file for e.g. authentication

    threads : int
        The number of threads to use to when contacting the server.

    compute_tag : str
        The compute tag used to match computations with compute managers. For OpenFF calculations,
        this should be "openff"

    priorty : str
        The priority of new calculations to submit. This must be either "low", "normal", or "high".

    skip_compute : bool
        Do not submit the tasks after the molecules and compute specifications have been added

    Returns
    -------
    None
    """

    ds_type = "OptimizationDataset"
    ds_name = dataset_name

    if server == "from_file":
        # Connect to a server that needs authentication
        client = ptl.FractalClient().from_file()

    elif server is not None:
        # Use a custom server, possibly a local, private server
        client = ptl.FractalClient(server, verify=False)

    else:
        # Use the default public MOLSSI server
        client = ptl.FractalClient()

    try:

        ds = client.get_collection(ds_type, ds_name)

        logger.info("\nDataset loaded with the following metadata:")
        logger.info(pformat(ds.data.metadata))

    except KeyError:

        assert metadata is not None
        metadata = json.load(open(metadata))
        metadata["collection_type"] = ds_type

        if ds_name is not None:
            metadata["dataset_name"] = ds_name
        else:
            ds_name = metadata["dataset_name"]

        ds = getattr(ptl.collections, ds_type)(
            ds_name,
            client=client,
            metadata=metadata,
            description=metadata["short_description"],
            tags=["openff"],
            tagline=metadata["short_description"],
        )
        logger.info("\nDataset created with the following metadata:")
        logger.info(pformat(metadata))

    if compute_spec is not None:
        specs = json.load(open(compute_spec))

        add_compute_specs(ds, specs)

    if input_molecules is not None:
        pool = ProcessPoolExecutor(max_workers=threads)

        new_mols = 0
        new_calcs = 0
        total_calcs = 0

        logger.info("\nLoading {} into QCArchive...".format(input_molecules))
        if input_molecules.endswith("lzma") or input_molecules.endswith("xz"):
            input_ds = json.load(lzma.open(input_molecules, "rt"))
        elif input_molecules.endswith("bz2"):
            input_ds = json.load(bz2.open(input_molecules, "rt"))
        else:
            input_ds = json.load(open(input_molecules))

        logger.info("Number of unique molecules: {}".format(len(input_ds)))

        work = []
        for j, index in enumerate(input_ds):
            for i, mol in enumerate(input_ds[index], 1):
                work_unit = pool.submit(submit, *(ds, index, mol, i))
                work.append(work_unit)

        ds.save()

        ids = []
        new_entries = 0

        iterable = enumerate(as_completed(work))
        if logger.getEffectiveLevel() >= logging.INFO:
            iterable = tqdm.tqdm(iterable,
                                 total=len(work),
                                 ncols=80,
                                 desc="Entries")

        for j, unit in iterable:
            unique_id, success = unit.result()
            new_entries += int(success)
            ids.append(unique_id)

        new_mols += len(input_ds)
        new_calcs += new_entries
        total_calcs += len(ids)

        logger.info("\nNumber of new entries: {}/{}".format(
            new_entries, len(ids)))

        stride = 20

        # Only submit tasks that were explicitly given as parameters
        if compute_spec is not None and not skip_compute:
            new_tasks = 0
            for qc_spec_name in specs:
                out_str = (
                    "\nSubmitting calculations in batches of {} for specification {}"
                )
                logger.info(out_str.format(stride, qc_spec_name))

                work = []
                args = (qc_spec_name, )
                kwargs = dict(priority=priority, tag=compute_tag)

                for entry_list in chunk(ids, stride):
                    kwargs["subset"] = entry_list
                    work_unit = pool.submit(ds.compute, *args, **kwargs)
                    work.append(work_unit)

                iterable = as_completed(work)
                if logger.getEffectiveLevel() >= logging.INFO:
                    iterable = tqdm.tqdm(iterable,
                                         total=len(work),
                                         ncols=80,
                                         desc="Tasks")

                for unit in iterable:
                    submitted = unit.result()
                    new_tasks += submitted

            logger.info("\nNumber of new tasks: {}".format(new_tasks))

        pool.shutdown(wait=True)

예제 #54

0

파일 보기

def do_nothing(*args, **kwargs):
    pass


def disable_stdout():
    import os
    import sys
    f = open(os.devnull, 'w')
    sys.stdout = f
    sys.stdout.flush = do_nothing
    sys.stdout.write = do_nothing


tx = TaskExecutor.load("conf/config.yaml", multi_process=False)

proc_pool = ProcessPoolExecutor(max_workers=64)
#proc_pool = ThreadPoolExecutor(max_workers=64)
tick_buffer = BufferedDataProcessor(num_worker=4)
history_buffer = BufferedDataProcessor(num_worker=8)
history_index_buffer = BufferedDataProcessor(num_worker=1)


def logtime(key):
    return lambda t: logging.debug("%s: %.3fs", key, t)


@tick_buffer.on_combine
@history_buffer.on_combine
@history_index_buffer.on_combine
def df_merge(a, b):
    return pd.concat([a, b])

예제 #55

0

파일 보기

파일: download_process.py 프로젝트: hexiyou/m3u8-dl

def process_pool_executor_handler(executor: ProcessPoolExecutor,
                                  manager: DownloadProcess,
                                  file_maps: Dict[str, str], directory: str,
                                  progress_bar_queue) -> None:
    done_queue = JoinableQueue()

    def update_hook(future: Future):
        temp = future.result()
        if temp:
            for failed_links in temp:
                done_queue.put(failed_links)

    while manager.done_retries != manager.max_retries:
        print(
            f"Starting download {manager.get_total_links() - manager.get_total_downloaded_links_count()} links left"
        )
        available_cpus = list(os.sched_getaffinity(
            os.getpid())) if platform.system() == "Linux" else [0, 1, 2, 3]
        print(
            f"available cpu's {available_cpus}, initializing {4 * manager.get_process_num()}"
            f" threads with {manager.get_thread_num()} links per "
            f"process")

        if len(manager.error_links):
            download_links = manager.error_links.copy()
            manager.error_links = []
        else:
            download_links = manager.get_download_links().copy()

        process_futures: List[Future] = []

        start = 0
        for temp_num in range(len(download_links)):
            end = start + manager.get_thread_num()

            if end > len(download_links):
                end = len(download_links)

            cpu_num = available_cpus[temp_num % len(available_cpus)]
            process_futures.append(
                executor.submit(start_threads, download_links[start:end],
                                file_maps, manager.get_session(), directory,
                                manager.http2, progress_bar_queue,
                                manager.debug, cpu_num))
            process_futures[-1].add_done_callback(update_hook)
            start = end

            if end >= len(download_links):
                break

        wait(process_futures)

        while not done_queue.empty():
            link = done_queue.get()
            manager.error_links.append(link)

        manager.set_total_downloaded_links_count(manager.get_total_links() -
                                                 len(manager.error_links))

        if manager.debug:
            print(
                f"Total downloaded links {manager.get_total_downloaded_links_count()}"
            )
            print(f"Error links generated {len(manager.error_links)}")

        if len(manager.error_links):
            manager.set_thread_num(
                int(
                    ceil((manager.get_total_links() -
                          manager.get_total_downloaded_links_count()) /
                         manager.get_process_num())))
            print(
                f"\n{manager.get_total_links()} was expected but "
                f"{manager.get_total_downloaded_links_count()} was downloaded."
            )
            manager.done_retries += 1
            print(f"Trying retry {manager.done_retries}")
        else:
            break

예제 #56

0

파일 보기

파일: http_server_asyncio.py 프로젝트: Shermineh-gh/ruuvitag-sensor


def setup_routes(app):
    app.router.add_get('/data', get_all_data)
    app.router.add_get('/data/{mac}', get_data)


if __name__ == '__main__':
    tags = {
        'F4:A5:74:89:16:57': 'kitchen',
        'CC:2C:6A:1E:59:3D': 'bedroom',
        'BB:2C:6A:1E:59:3D': 'livingroom'
    }

    m = Manager()
    q = m.Queue()

    # Start background process
    executor = ProcessPoolExecutor(1)
    executor.submit(run_get_data_background, list(tags.keys()), q)

    loop = asyncio.get_event_loop()

    # Start data updater
    loop.create_task(data_update(q))

    # Setup and start web application
    app = web.Application(loop=loop)
    setup_routes(app)
    web.run_app(app, host='0.0.0.0', port=5000)

예제 #57

0

파일 보기

파일: download_img_and_user.py 프로젝트: runnerxin/HyFea

    imgs_list = soup.find_all('img')
    if len(imgs_list) != 0:
        img_url = 'https:' + imgs_list[-1]['src']
    else:
        img_url = 'error_img'
    return url + '\t' + img_url


# imgs['url'] = imgs.img.apply(crawl)

imgs = pd.read_csv(os.path.join(path, 'train_all_json/train_img.txt'), header=None, names=['img'])
# img count 305613
for j in range(0, 3050):
    print('save:', j * 100, (j + 1) * 100)
    f = open(os.path.join(path, 'img_url.txt'), 'a+')
    with ProcessPoolExecutor(8) as pool:
        if j == 3049:
            p = pool.map(crawl, imgs.img[j * 100:])
        else:
            p = pool.map(crawl, imgs.img[j * 100:(j + 1) * 100])
        for i in p:
            f.write(i + '\n')
        f.close()


# download image
def get_image(img, url):
    usr = img.split('/')[-2]
    file_suffix = img.split('/')[-1] + '.jpg'
    file_path = os.path.join(path, 'train', usr)
    try:

예제 #58

0

파일 보기

파일: snare.py 프로젝트: ravindra1307/snare

        print('Available pages:\n')
        for page in os.listdir('/opt/snare/pages/'):
            print('\t- {}'.format(page))
        print('\nuse with --page-dir {page_name}\n\n')
        exit()
    if not os.path.exists('/opt/snare/pages/' + args.page_dir):
        print("--page-dir: {0} does not exist".format(args.page_dir))
        exit()
    if not os.path.exists('/opt/snare/pages/' + args.page_dir + "/" + args.index_page):
        print('can\'t crate meta tag')
    else:
        add_meta_tag(args.page_dir, args.index_page)
    loop = asyncio.get_event_loop()
    loop.run_until_complete(check_tanner())

    pool = ProcessPoolExecutor(max_workers=multiprocessing.cpu_count())
    compare_version_fut = None
    if args.auto_update is True:
        timeout = parse_timeout(args.update_timeout)
        compare_version_fut = loop.run_in_executor(pool, compare_version_info, timeout)

    if args.host_ip == 'localhost' and args.interface:
        host_ip = ni.ifaddresses(args.interface)[2][0]['addr']
    else:
        host_ip = args.host_ip
    future = loop.create_server(
        lambda: HttpRequestHandler(args, debug=args.debug, keep_alive=75),
        args.interface, int(args.port))
    srv = loop.run_until_complete(future)

    drop_privileges()

예제 #59

0

파일 보기

파일: Luhn1Coco.py 프로젝트: kevinmarlis/Functional-Programming

 def __iter__(self):
     from concurrent.futures import ProcessPoolExecutor
     with ProcessPoolExecutor() as executor:
         return _coconut.iter(
             _coconut.list(executor.map(self.func, *self.iters)))

예제 #60

0

파일 보기

def main():

    args = parse_arguments()

    if args.use_env and 'LOCAL_RANK' in os.environ:
        args.local_rank = int(os.environ['LOCAL_RANK'])

    random.seed(args.seed)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    device, args = setup_training(args)
    dllogger.log(step="PARAMETER", data={"Config": [str(args)]})

    # Prepare optimizer
    model, optimizer, lr_scheduler, checkpoint, global_step = prepare_model_and_optimizer(
        args, device)

    if is_main_process():
        dllogger.log(step="PARAMETER", data={"SEED": args.seed})

    raw_train_start = time.time()
    if args.do_train:
        if is_main_process():
            dllogger.log(step="PARAMETER", data={"train_start": True})
            dllogger.log(step="PARAMETER",
                         data={"batch_size_per_gpu": args.train_batch_size})
            dllogger.log(step="PARAMETER",
                         data={"learning_rate": args.learning_rate})

        model.train()
        most_recent_ckpts_paths = []
        average_loss = 0.0  # averaged loss every args.log_freq steps
        epoch = 0
        training_steps = 0

        pool = ProcessPoolExecutor(1)

        # Note: We loop infinitely over epochs, termination is handled via iteration count
        while True:
            thread = None
            if not args.resume_from_checkpoint or epoch > 0 or (
                    args.phase2 and global_step < 1) or args.init_checkpoint:
                files = [
                    os.path.join(args.input_dir, f)
                    for f in os.listdir(args.input_dir)
                    if os.path.isfile(os.path.join(args.input_dir, f))
                    and 'training' in f
                ]
                files.sort()
                num_files = len(files)
                random.shuffle(files)
                f_start_id = 0
            else:
                f_start_id = checkpoint['files'][0]
                files = checkpoint['files'][1:]
                args.resume_from_checkpoint = False
                num_files = len(files)

            shared_file_list = {}

            if torch.distributed.is_initialized(
            ) and torch.distributed.get_world_size() > num_files:
                remainder = torch.distributed.get_world_size() % num_files
                data_file = files[
                    (f_start_id * torch.distributed.get_world_size() +
                     torch.distributed.get_rank() + remainder * f_start_id) %
                    num_files]
            else:
                data_file = files[
                    (f_start_id * torch.distributed.get_world_size() +
                     torch.distributed.get_rank()) % num_files]

            previous_file = data_file

            train_data = pretraining_dataset(data_file,
                                             args.max_predictions_per_seq)
            train_sampler = RandomSampler(train_data)
            train_dataloader = DataLoader(train_data,
                                          sampler=train_sampler,
                                          batch_size=args.train_batch_size *
                                          args.n_gpu,
                                          num_workers=4,
                                          pin_memory=True)
            # shared_file_list["0"] = (train_dataloader, data_file)

            overflow_buf = None
            if args.allreduce_post_accumulation:
                overflow_buf = torch.cuda.IntTensor([0])

            if len(files) == 1:
                f_start_id = -1
            for f_id in range(f_start_id + 1, len(files)):

                if torch.distributed.get_world_size() > num_files:
                    data_file = files[
                        (f_id * torch.distributed.get_world_size() +
                         torch.distributed.get_rank() + remainder * f_id) %
                        num_files]
                else:
                    data_file = files[
                        (f_id * torch.distributed.get_world_size() +
                         torch.distributed.get_rank()) % num_files]

                previous_file = data_file

                dataset_future = pool.submit(create_pretraining_dataset,
                                             data_file,
                                             args.max_predictions_per_seq,
                                             shared_file_list, args)

                train_iter = tqdm(train_dataloader, desc="Iteration"
                                  ) if is_main_process() else train_dataloader
                for step, batch in enumerate(train_iter):

                    training_steps += 1
                    batch = [t.to(device) for t in batch]
                    input_ids, segment_ids, input_mask, masked_lm_labels, next_sentence_labels = batch
                    loss = model(
                        input_ids=input_ids,
                        token_type_ids=segment_ids,
                        attention_mask=input_mask,
                        masked_lm_labels=masked_lm_labels,
                        next_sentence_label=next_sentence_labels,
                        checkpoint_activations=args.checkpoint_activations)
                    if args.n_gpu > 1:
                        loss = loss.mean()  # mean() to average on multi-gpu.

                    divisor = args.gradient_accumulation_steps
                    if args.gradient_accumulation_steps > 1:
                        if not args.allreduce_post_accumulation:
                            # this division was merged into predivision
                            loss = loss / args.gradient_accumulation_steps
                            divisor = 1.0
                    if args.fp16:
                        with amp.scale_loss(
                                loss,
                                optimizer,
                                delay_overflow_check=args.
                                allreduce_post_accumulation) as scaled_loss:
                            scaled_loss.backward()
                    else:
                        loss.backward()
                    average_loss += loss.item()

                    if training_steps % args.gradient_accumulation_steps == 0:
                        lr_scheduler.step()  # learning rate warmup
                        global_step = take_optimizer_step(
                            args, optimizer, model, overflow_buf, global_step)

                    if global_step >= args.max_steps:
                        train_time_raw = time.time() - raw_train_start
                        last_num_steps = int(
                            training_steps /
                            args.gradient_accumulation_steps) % args.log_freq
                        last_num_steps = args.log_freq if last_num_steps == 0 else last_num_steps
                        average_loss = torch.tensor(
                            average_loss, dtype=torch.float32).cuda()
                        average_loss = average_loss / (last_num_steps *
                                                       divisor)
                        if (torch.distributed.is_initialized()):
                            average_loss /= torch.distributed.get_world_size()
                            torch.distributed.all_reduce(average_loss)
                        final_loss = average_loss.item()
                        if is_main_process():
                            dllogger.log(step=(
                                epoch,
                                training_steps /
                                args.gradient_accumulation_steps,
                            ),
                                         data={"final_loss": final_loss})
                    elif training_steps % (
                            args.log_freq *
                            args.gradient_accumulation_steps) == 0:
                        if is_main_process():
                            dllogger.log(
                                step=(
                                    epoch,
                                    global_step,
                                ),
                                data={
                                    "average_loss":
                                    average_loss / (args.log_freq * divisor),
                                    "step_loss":
                                    loss.item() *
                                    args.gradient_accumulation_steps / divisor,
                                    "learning_rate":
                                    optimizer.param_groups[0]['lr']
                                })
                        average_loss = 0

                    if global_step >= args.max_steps or training_steps % (
                            args.num_steps_per_checkpoint *
                            args.gradient_accumulation_steps) == 0:
                        if is_main_process() and not args.skip_checkpoint:
                            # Save a trained model
                            dllogger.log(step="PARAMETER",
                                         data={"checkpoint_step": global_step})
                            model_to_save = model.module if hasattr(
                                model, 'module'
                            ) else model  # Only save the model it-self
                            if args.resume_step < 0 or not args.phase2:
                                output_save_file = os.path.join(
                                    args.output_dir,
                                    "ckpt_{}.pt".format(global_step))
                            else:
                                output_save_file = os.path.join(
                                    args.output_dir,
                                    "ckpt_{}.pt".format(global_step +
                                                        args.phase1_end_step))
                            if args.do_train:
                                torch.save(
                                    {
                                        'model':
                                        model_to_save.state_dict(),
                                        'optimizer':
                                        optimizer.state_dict(),
                                        'master params':
                                        list(amp.master_params(optimizer)),
                                        'files': [f_id] + files
                                    }, output_save_file)

                                most_recent_ckpts_paths.append(
                                    output_save_file)
                                if len(most_recent_ckpts_paths) > 3:
                                    ckpt_to_be_removed = most_recent_ckpts_paths.pop(
                                        0)
                                    os.remove(ckpt_to_be_removed)

                        if global_step >= args.max_steps:
                            del train_dataloader
                            # thread.join()
                            return args, final_loss, train_time_raw

                del train_dataloader
                # thread.join()
                # Make sure pool has finished and switch train_dataloader
                # NOTE: Will block until complete
                train_dataloader, data_file = dataset_future.result(
                    timeout=None)

            epoch += 1