async def apply(self, fn, *args): """ Like multiprocessing.Pool.apply_async, but: * is an asyncio coroutine * terminates the process if cancelled """ while not self._free: await self._change.wait() self._change.clear() pool = usable_pool = self._free.pop() self._working.add(pool) loop = asyncio.get_event_loop() fut = loop.create_future() def _on_done(obj): loop.call_soon_threadsafe(fut.set_result, obj) def _on_err(err): loop.call_soon_threadsafe(fut.set_exception, err) pool.apply_async(fn, args, callback=_on_done, error_callback=_on_err) try: return await fut except asyncio.CancelledError: pool.terminate() usable_pool = self._new_pool() finally: self._working.remove(pool) self._free.add(usable_pool) self._change.set()
def _run_tests(self): pool = multiprocessing.pool.ThreadPool( processes=self.suite_concurrency) outstanding_suites = [] for suite in self.suite_runners: suite.task = pool.apply_async(suite.run) outstanding_suites.append(suite) ret = True try: while len(outstanding_suites) > 0: for suite in list(outstanding_suites): if suite.timed_out(): msg = "Task %s not finished within timeout %s" % ( suite.name, suite.suite.timeout_minutes, ) logging.error(msg) raise Exception(msg) task = suite.task if task.ready(): this_task_ret = task.get() outstanding_suites.remove(suite) if this_task_ret: logging.info("Suite %s succeeded.", suite.name) else: logging.info("Suite %s failed.", suite.name) ret = False time.sleep(5) except KeyboardInterrupt: logging.info("\n\nDetected KeyboardInterrupt; shutting down!\n\n") raise finally: pool.terminate() return ret
def finalize(self): pool = self._pool self._next = None self._pool = None if pool is not None: pool.terminate()
def find_pfm_externals(all_outputs, py_exe, no_py_mods, no_sdl_mods, py_path, sdl_path): def pool_cb(output, asset_category, source_path, asset_paths): for asset_path in asset_paths: asset_key = str(asset_path.relative_to(source_path)) output.setdefault(asset_category, {}).setdefault(asset_key, {}) pool = multiprocessing.pool.Pool(initializer=_utils.multiprocess_init) try: for output in all_outputs.values(): pfm_names = [pathlib.Path(i).stem for i in output.get("python", {}).keys()] py_cb = functools.partial(pool_cb, output, "python", py_path) sdl_cb = functools.partial(pool_cb, output, "sdl", sdl_path) if not no_sdl_mods: pool.apply_async(find_pfm_sdlmods, (sdl_path, pfm_names), callback=sdl_cb, error_callback=log_exception) if not no_py_mods: pool.apply_async(find_pfm_dependency_pymodules, (py_exe, py_path, pfm_names), callback=py_cb, error_callback=log_exception) except: pool.terminate() pool.join() raise else: # Ensure all jobs finish pool.close() pool.join()
def download_imgs(imgurls, dest_dir): jobs = [] for url in imgurls: filename = urllib.parse.urlparse(url).path.split('/')[-1] if not filename: logging.error('Filename cannot be obtained from url: %s', url) continue dest_path = os.path.join(dest_dir, filename) if not os.path.exists(dest_path): jobs.append((url, dest_path)) pool = multiprocessing.pool.ThreadPool(os.cpu_count() * THREAD_PER_CPU) pool.map(download_img_worker, jobs) try: pool.close() pool.join() except KeyboardInterrupt: print('KeyboardInterrrrrrrrrupt!') pool.terminate() pool.join() logging.debug('download finished')
def multiprocessing_timeout( self, wrapped_func: Callable[..., Any], args: Any, kwargs: Any ) -> Any: """ Multiprocessing method for timeouts; works in threads and on windows Args: wrapped_func: function being decorated args: function being decorated args kwargs: function being decorated kwargs Returns: Any: result of decorated function Raises: N/A """ pool = multiprocessing.pool.ThreadPool(processes=1) future = pool.apply_async(wrapped_func, args, kwargs) try: result = future.get(timeout=self.timeout_duration) pool.terminate() return result except multiprocessing.context.TimeoutError: pool.terminate() self._handle_timeout()
def get_kmers_for_all_files(all_files, all_hosts, k): kmers_for_files_dict = {} kmers_sequences_only_for_de_bruijn = [] read_files_for_hosts = dict() for host in all_hosts: read_files_for_hosts[host] = [] for file in all_files: if (host.replace('__', '_') in file): read_files_for_hosts[host].append(file) ## Don't need this for 1 pair test for host, read_files_for_host in read_files_for_hosts.items(): if (len(read_files_for_host) == 0): print("Error: no read files for host %s" % (host)) sys.exit() get_kmers_for_all_files_parallel_inputs = [] for host in all_hosts: get_kmers_for_all_files_parallel_inputs.append((host, read_files_for_hosts[host])) with closing(NoDaemonPool(processes=100)) as pool: kmers_with_counts = pool.map(get_file_kmers_with_counts, get_kmers_for_all_files_parallel_inputs) pool.terminate() for host, kmers, current_kmers_sequences_only_for_de_bruijn in kmers_with_counts: kmers_for_files_dict.setdefault(host, []).append(kmers) kmers_sequences_only_for_de_bruijn.extend(current_kmers_sequences_only_for_de_bruijn) return (kmers_for_files_dict, kmers_sequences_only_for_de_bruijn)
def system_status(): from status_checks import run_checks class WebOutput: def __init__(self): self.items = [] def add_heading(self, heading): self.items.append({"type": "heading", "text": heading, "extra": []}) def print_ok(self, message): self.items.append({"type": "ok", "text": message, "extra": []}) def print_error(self, message): self.items.append({"type": "error", "text": message, "extra": []}) def print_warning(self, message): self.items.append({"type": "warning", "text": message, "extra": []}) def print_line(self, message, monospace=False): self.items[-1]["extra"].append({"text": message, "monospace": monospace}) output = WebOutput() # Create a temporary pool of processes for the status checks pool = multiprocessing.pool.Pool(processes=5) run_checks(False, env, output, pool) pool.terminate() return json_response(output.items)
def run(self) -> float: if self._complete.is_set(): raise StopIteration("This runner has already being used") if self._running: raise StopIteration("This runner is being executed") self._running = True if (self._threads != 1 or self._processes != 1) and self._optimize_workers: t = time.time() result = self._function(*next(self._raw_function_arguments)) time_spent = time.time() - t if self._check_function(result): self._success_function(result) if time_spent < self.__speed_reference: self._threads = 1 self._processes = 1 self._function_arguments = self._raw_function_arguments else: self._function_arguments = self._raw_function_arguments if self._threads == self._processes and self._threads == 1: self._function_arguments: collections.Iterable start = time.time() for args in self._function_arguments: output = self._function(*args) if self._check_function(output): self._success_function(output) return time.time() - start self._function_arguments = Queue(self._raw_function_arguments) if self._processes == 1 or self._threads == 1: if self._processes > self._threads: self._threads = self._processes self._blocking_success = True start = time.time() self._process_worker() return time.time() - start self._blocking_success = False self._success_sync_queue = multiprocessing.Queue() sync_thread = threading.Thread(target=self._sync_success, ) sync_thread.start() if any(platform in sys.platform for platform in ("win", "ios")) or self._processes_as_threads: process_pool = multiprocessing.pool.ThreadPool else: process_pool = multiprocessing.pool.Pool start = time.time() pool = process_pool(processes=self._processes) pool.imap_unordered(lambda f: f(), (self._process_worker for _ in range(self._processes)), chunksize=self._processes) pool.close() pool.join() pool.terminate() self._complete.set() self._function_arguments.stop() self._function_arguments.join() sync_thread.join() self._running = False return time.time() - start
def run_calls(fun, list_of_args, extra_args=(), pool_type='processes', nb_workers=multiprocessing.cpu_count(), timeout=60, verbose=True, initializer=None, initargs=None): """ Run a function several times in parallel with different inputs. Args: fun: function to be called several times in parallel. list_of_args: list of (first positional) arguments passed to fun, one per call extra_args: tuple containing extra arguments to be passed to fun (same value for all calls) pool_type: either 'processes' or 'threads' nb_workers: number of calls run simultaneously timeout: number of seconds allowed per function call verbose: either True (show the amount of computed calls) or False initializer, initargs (optional): if initializer is not None then each worker process will call initializer(*initargs) when it starts Return: list of outputs """ if pool_type == 'processes': pool = multiprocessing.Pool(nb_workers, initializer, initargs) elif pool_type == 'threads': pool = multiprocessing.pool.ThreadPool(nb_workers) else: print('ERROR: unknow pool_type "{}"'.format(pool_type)) results = [] outputs = [] if verbose: show_progress.counter = 0 show_progress.total = len(list_of_args) for x in list_of_args: if type(x) == tuple: args = x + extra_args else: args = (x, ) + extra_args results.append( pool.apply_async(fun, args=args, callback=show_progress if verbose else None)) for r in results: try: outputs.append(r.get(timeout)) except KeyboardInterrupt: pool.terminate() sys.exit(1) pool.close() pool.join() return outputs
def _run_tests(self): pool = multiprocessing.pool.ThreadPool(processes=self.suite_concurrency) outstanding_suites = [] for suite in self.suite_runners: suite.task = pool.apply_async(suite.run) outstanding_suites.append(suite) ret = True try: while len(outstanding_suites) > 0: for suite in list(outstanding_suites): if suite.timed_out(): msg = "Task %s not finished within timeout %s" % (suite.name, suite.suite.timeout_minutes,) logging.error(msg) raise Exception(msg) task = suite.task if task.ready(): this_task_ret = task.get() outstanding_suites.remove(suite) if this_task_ret: logging.info("Suite %s succeeded.", suite.name) else: logging.info("Suite %s failed.", suite.name) ret = False time.sleep(5) except KeyboardInterrupt: logging.info("\n\nDetected KeyboardInterrupt; shutting down!\n\n") raise finally: pool.terminate() return ret
def ScopedPool(*args, **kwargs): """Context Manager which returns a multiprocessing.pool instance which correctly deals with thrown exceptions. *args - Arguments to multiprocessing.pool Kwargs: kind ('threads', 'procs') - The type of underlying coprocess to use. **etc - Arguments to multiprocessing.pool """ if kwargs.pop('kind', None) == 'threads': pool = multiprocessing.pool.ThreadPool(*args, **kwargs) else: orig, orig_args = kwargs.get('initializer'), kwargs.get('initargs', ()) kwargs['initializer'] = _ScopedPool_initer kwargs['initargs'] = orig, orig_args pool = multiprocessing.pool.Pool(*args, **kwargs) try: yield pool pool.close() except: pool.terminate() raise finally: pool.join()
def bootstrap_on_subdata(subdata, pop=None, tests=None, cutoffs=None, do_trims=None, b_reps=None): nogenes = len(subdata.index.values) if nogenes <= 0: results = pd.DataFrame(index=[0]) results['nogenes'] = 0 else: if do_trims is None: do_trims = [True, False] if cutoffs is None: cutoffs = [0.05, 0.15] if tests is None: tests = ['eMKT', 'aMKT'] if b_reps is None: b_reps = 100 pars = [(subdata.sample(n=nogenes, replace=True), pop, tests, cutoffs, do_trims) for _ in range(b_reps)] pool = MyPool(processes=1) # multiprocessing.cpu_count()) results_list = pool.starmap(mkt_on_subdata, pars) pool.terminate() results = pd.concat(results_list, axis=0, ignore_index=True) return results
def fast_process(fn_t, fn_read, shape, tally_depth, ds, iw, ih, categories, fieldmap, thresh, labelcat, batch_size, ahead, verbose, parallel): psize = int(numpy.ceil(float(ds.size()) / parallel)) ranges = [(s, min(ds.size(), s + psize)) for s in range(0, ds.size(), psize) if s < ds.size()] parallel = len(ranges) original_sigint_handler = setup_sigint() # pool = multiprocessing.Pool(processes=parallel, initializer=setup_sigint) pool = multiprocessing.pool.ThreadPool(processes=parallel) restore_sigint(original_sigint_handler) # Precache memmaped files blobdata = cached_memmap(fn_read, mode='r', dtype='float32', shape=shape) count_t = cached_memmap(fn_t, mode='r+', dtype='int32', shape=(ds.size(), tally_depth, 3)) data = [(fn_t, fn_read, shape, tally_depth, ds, iw, ih, categories, fieldmap, thresh, labelcat, batch_size, ahead, verbose, True) + r for r in ranges] try: result = pool.map_async(individual_process, data) result.get(31536000) except KeyboardInterrupt: print("Caught KeyboardInterrupt, terminating workers") pool.terminate() raise else: pool.close() pool.join()
def test_no_thread_pool(): pool = xmon_stepper.ThreadlessPool() result = pool.map(lambda x: x + 1, range(10)) assert result == [x + 1 for x in range(10)] # No ops. pool.terminate() pool.join()
def hyperpol_current( self, model, delay, dur, section_stim, loc_stim, section_rec, loc_rec ): # find a hyperpolarizing current to stop spontaneous firing upper_bound = -0.02 lower_bound = -0.01 h_amps = numpy.arange(lower_bound, upper_bound, -0.002) for h_amp in h_amps: pool = multiprocessing.Pool( 1, maxtasksperchild=1 ) # I use multiprocessing to keep every NEURON related task in independent processes trace = pool.apply(self.run_cclamp_on_soma, args=(model, 0, h_amp, delay, dur, section_stim, loc_stim, section_rec, loc_rec)) pool.terminate() pool.join() del pool print('h_amp: ' + str(h_amp)) spikecount = self.spikecount(delay, dur, trace) if spikecount == 0: hyperpol_amp = h_amp break return hyperpol_amp
def generate_prediction(self, model, verbose=False): """Implementation of sciunit.Test.generate_prediction.""" efel.reset() pool = multiprocessing.Pool(self.npool, maxtasksperchild=1) #amps = numpy.arange(0,3.55,0.05) amps = numpy.arange(0, 1.65, 0.05) cclamp_ = functools.partial(self.cclamp, model, delay=500, dur=1000) results = pool.map(cclamp_, amps, chunksize=1) #results = result.get() pool.terminate() pool.join() del pool plt.close( 'all' ) #needed to avoid overlapping of saved images when the test is run on multiple models in a for loop I_maxNumAP, I_below_depol_block, Veq = self.find_Ith_Veq( model, results, amps) prediction = { 'model_I_maxNumAP': float(I_maxNumAP) * nA, 'model_I_below_depol_block': float(I_below_depol_block) * nA, 'model_Veq': float(Veq) * mV } efel.reset() return prediction
def mkt_on_df(gene_df, data_df, label=None, pops=None, tests=None, cutoffs=None, do_trims=None, bootstrap=None, b_reps=None): if do_trims is None: do_trims = [True, False] if cutoffs is None: cutoffs = [0.05, 0.15] if tests is None: tests = ['eMKT', 'aMKT'] if pops is None: pops = ['AFR', 'EUR'] if bootstrap is None: bootstrap = False if b_reps is None: b_reps = 100 pars = [(gene_df.iloc[:, i], data_df, pops, tests, cutoffs, do_trims, bootstrap, b_reps) for i in range(len(gene_df.columns.values))] # Loads the models for all the parameters parsed using multiprocessing to speed up computations pool = MyPool(processes=4) # multiprocessing.cpu_count()) results_list = pool.starmap(mkt_on_col, pars) pool.terminate() results = pd.concat(results_list, axis=0, ignore_index=True) if label is not None: results['label'] = label return results
def binsearch(self, model, stim_range, delay, dur, section_stim, loc_stim, section_rec, loc_rec): c_minmax = stim_range c_step_start = 0.01 c_step_stop= 0.002 found = False spikecounts = [] amplitudes = [] while c_step_start >= c_step_stop and not found: c_stim = numpy.arange(c_minmax[0], c_minmax[1], c_step_start) first = 0 last = numpy.size(c_stim, axis=0)-1 while first <= last and not found: midpoint = (first + last)//2 amplitude = c_stim[midpoint] result=[] pool = multiprocessing.Pool(1, maxtasksperchild = 1) # I use multiprocessing to keep every NEURON related task in independent processes traces= pool.apply(self.run_cclamp_on_soma, args = (model, amplitude, delay, dur, section_stim, loc_stim, section_rec, loc_rec)) pool.terminate() pool.join() del pool spikecount = self.spikecount(delay, dur, traces) amplitudes.append(amplitude) spikecounts.append(spikecount) #if spikecount >= 10 and spikecount <=20: if spikecount == 15: found = True else: #if spikecount > 20: if spikecount > 15: last = midpoint-1 #elif spikecount < 10: elif spikecount < 15: first = midpoint+1 c_step_start=c_step_start/2 if not found: amp_index = min((p for p in range(len(spikecounts)) if spikecounts[p] != 0), key=lambda i: abs(spikecounts[i]-15.0)) # we choose the one that is nearest to 15, but not 0 # print list(p for p in range(len(spikecounts)) if spikecounts[p] != 0) # this gives the indices where spikecount is not 0, then i takes up these values #print amp_index amplitude = amplitudes[amp_index] spikecount = spikecounts[amp_index] binsearch_result=[found, amplitude, spikecount] #print binsearch_result return binsearch_result
def _close(self): for pool in self._managed_pools: pool.terminate() for pool in self._managed_pools: pool.join() self._aliases.clear() self._aliases_per_pool.clear() self._managed_pools.clear()
def shutdown(self): """ Gracefully terminate a running runtime. """ self.ready.clear() self.shutdown_send.send(self.SHUTDOWN_TRIGGER) # trigger background thread to shutdown for pool in self.pools: if pool.is_alive(): pool.terminate() pool.join()
def _create_process_pool(processes=1): if _MULTIPROCESSING and processes: logger.info("creating pool with %i workers", processes) pool = multiprocessing.pool.Pool(processes=processes) else: logger.info("creating dummy pool") pool = DummyPool() yield pool pool.terminate()
def s3_iter_bucket(bucket, prefix="", accept_key=lambda key: True, key_limit=None, workers=16): """ Iterate and download all S3 files under `bucket/prefix`, yielding out `(key, key content)` 2-tuples (generator). `accept_key` is a function that accepts a key name (unicode string) and returns True/False, signalling whether the given key should be downloaded out or not (default: accept all keys). If `key_limit` is given, stop after yielding out that many results. The keys are processed in parallel, using `workers` processes (default: 16), to speed up downloads greatly. If multiprocessing is not available, thus NO_MULTIPROCESSING is True, this parameter will be ignored. Example:: >>> mybucket = boto.connect_s3().get_bucket('mybucket') >>> # get all JSON files under "mybucket/foo/" >>> for key, content in s3_iter_bucket(mybucket, prefix='foo/', accept_key=lambda key: key.endswith('.json')): ... print key, len(content) >>> # limit to 10k files, using 32 parallel workers (default is 16) >>> for key, content in s3_iter_bucket(mybucket, key_limit=10000, workers=32): ... print key, len(content) """ total_size, key_no = 0, -1 keys = (key for key in bucket.list(prefix=prefix) if accept_key(key.name)) if NO_MULTIPROCESSING: logger.info("iterating over keys from %s without multiprocessing" % bucket) iterator = imap(s3_iter_bucket_process_key, keys) else: logger.info("iterating over keys from %s with %i workers" % (bucket, workers)) pool = multiprocessing.pool.Pool(processes=workers) iterator = pool.imap_unordered(s3_iter_bucket_process_key, keys) for key_no, (key, content) in enumerate(iterator): if key_no % 1000 == 0: logger.info( "yielding key #%i: %s, size %i (total %.1fMB)" % (key_no, key, len(content), total_size / 1024.0 ** 2) ) yield key, content key.close() total_size += len(content) if key_limit is not None and key_no + 1 >= key_limit: # we were asked to output only a limited number of keys => we're done break if not NO_MULTIPROCESSING: pool.terminate() logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))
def prepare_packages(all_outputs, client_path, scripts_path, **kwargs): pool = multiprocessing.pool.Pool(initializer=_utils.multiprocess_init) try: missing_assets = [] for package_name, package_dict in all_outputs.items(): for asset_category, assets in package_dict.items(): for asset_filename, asset_dict in assets.items(): asset_source_path = make_asset_path(asset_category, asset_filename, client_path=client_path, scripts_path=scripts_path) if not asset_source_path.exists(): missing_assets.append((package_name, asset_category, asset_filename)) logging.warning(f"Asset '{asset_source_path.name}' (used in '{package_name}') is missing from the client.") continue # Fill in some information from the filesystem. stat = asset_source_path.stat() asset_dict["modify_time"] = int(stat.st_mtime) asset_dict["size"] = stat.st_size # Command line specs for key, value in kwargs.items(): if value is not None: asset_dict[key] = str(value) # Now we submit slow operations to the process pool. def pool_cb(asset_dict, key, value): asset_dict[key] = value md5_complete = functools.partial(pool_cb, asset_dict, "hash_md5") sha2_complete = functools.partial(pool_cb, asset_dict, "hash_sha2") pool.apply_async(_utils.hash_md5, (asset_source_path,), callback=md5_complete, error_callback=log_exception) pool.apply_async(_utils.hash_sha2, (asset_source_path,), callback=sha2_complete, error_callback=log_exception) # Discard any missing thingos from our asset map and it will be very nearly final. for package_name, asset_category, asset_filename in missing_assets: all_outputs[package_name][asset_category].pop(asset_filename) for package_name in tuple(all_outputs.keys()): package_dict = all_outputs[package_name] for asset_category in tuple(package_dict.keys()): if not package_dict[asset_category]: package_dict.pop(asset_category) if not package_dict: all_outputs.pop(package_name) except: pool.terminate() pool.join() raise else: # Wait for the pool to finish pool.close() pool.join() return not bool(missing_assets)
def s3_iter_bucket(bucket, prefix='', accept_key=lambda key: True, key_limit=None, workers=16, retries=3): """ Iterate and download all S3 files under `bucket/prefix`, yielding out `(key, key content)` 2-tuples (generator). `accept_key` is a function that accepts a key name (unicode string) and returns True/False, signalling whether the given key should be downloaded out or not (default: accept all keys). If `key_limit` is given, stop after yielding out that many results. The keys are processed in parallel, using `workers` processes (default: 16), to speed up downloads greatly. If multiprocessing is not available, thus MULTIPROCESSING is False, this parameter will be ignored. Example:: >>> mybucket = boto.connect_s3().get_bucket('mybucket') >>> # get all JSON files under "mybucket/foo/" >>> for key, content in s3_iter_bucket(mybucket, prefix='foo/', accept_key=lambda key: key.endswith('.json')): ... print key, len(content) >>> # limit to 10k files, using 32 parallel workers (default is 16) >>> for key, content in s3_iter_bucket(mybucket, key_limit=10000, workers=32): ... print key, len(content) """ total_size, key_no = 0, -1 keys = ({'key': key, 'retries': retries} for key in bucket.list(prefix=prefix) if accept_key(key.name)) if MULTIPROCESSING: logger.info("iterating over keys from %s with %i workers" % (bucket, workers)) pool = multiprocessing.pool.Pool(processes=workers) iterator = pool.imap_unordered(s3_iter_bucket_process_key_with_kwargs, keys) else: logger.info("iterating over keys from %s without multiprocessing" % bucket) iterator = imap(s3_iter_bucket_process_key_with_kwargs, keys) for key_no, (key, content) in enumerate(iterator): if key_no % 1000 == 0: logger.info("yielding key #%i: %s, size %i (total %.1fMB)" % (key_no, key, len(content), total_size / 1024.0 ** 2)) yield key, content key.close() total_size += len(content) if key_limit is not None and key_no + 1 >= key_limit: # we were asked to output only a limited number of keys => we're done break if MULTIPROCESSING: pool.terminate() logger.info("processed %i keys, total size %i" % (key_no + 1, total_size))
def getScoreAl(album, word): pool = MyPool() #creating multiple processes for each song score = 0 trackList = getTracks(album) d = dict((el, word) for el in trackList) for sc in pool.starmap(getScoreTr, d.items()): #adding score for each track score += sc pool.terminate() #terminating for safe operation return (score)
def closing(pool: Pool) -> Iterator[Pool]: """Return a context manager making sure the pool closes properly.""" try: yield pool finally: # For Pool.imap*, close and join are needed # for the returned iterator to begin yielding. pool.close() pool.join() pool.terminate()
def __call__(self, net_lists): evaluations = np.zeros(len(net_lists)) for i in np.arange(0, len(net_lists), self.gpu_num): process_num = np.min((i + self.gpu_num, len(net_lists))) - i pool = NoDaemonProcessPool(process_num) arg_data = [(cnn_eval, net_lists[i+j], j, self.epoch_num, self.batchsize, self.dataset, self.verbose, self.imgSize, self.datapath, self.seed) for j in range(process_num)] evaluations[i:i+process_num] = pool.map(arg_wrapper_mp, arg_data) pool.terminate() return evaluations
def main(): repatching("threading") pool = multiprocessing.pool.Pool() rng = random.Random() rng.seed(random.SystemRandom().random()) for i in range(2): pool.apply_async(delay_print, (rng.randrange(3, 5), )) pool.close() pool.join() pool.terminate()
def cleanup(): pool.terminate() for p in processes: try: p.terminate() except OSError: # We can fail to terminate if the process is already finished, so # ignore such failures. pass processes.clear()
def getScore(artist, word): pool = MyPool() #multiple processes for each album score = 0 al = [] albums = getAlbums(artist) d = dict((el, word) for el in albums) for sc in pool.starmap(getScoreAl, d.items()): #adding score for each album score = score + sc pool.terminate() #terminating for safe operation return (score)
def loader(df): if not os.path.exists(OUT_DIR): os.mkdir(OUT_DIR) key_url_list = parse_data(df) pool = multiprocessing.pool.ThreadPool(processes=NUM_WORKERS) failures = sum(pool.imap_unordered(Downloader(), key_url_list)) print('Total number of download failures: %s out of %s' % (failures, len(key_url_list))) pool.close() pool.terminate()
def buildList(self): """ Build the artifact "list" from sources defined in the given configuration. :returns: Dictionary described above. """ priority = 0 pool_dict = {} for source in self.configuration.artifactSources: priority += 1 pool = pool_dict.setdefault(source['type'], ThreadPool(self.MAX_THREADS_DICT[source['type']])) pool.apply_async(self._read_artifact_source, args=[source, priority], callback=self._add_result) for pool in pool_dict.values(): pool.close() at_least_1_runs = True all_keys = range(1, len(self.configuration.artifactSources) + 1) finished = False while at_least_1_runs: for i in range(30): time.sleep(1) if not self.errors.empty(): for pool in pool_dict.values(): logging.debug("Terminating pool %s", str(pool)) pool.terminate() finished = True break at_least_1_runs = False if not finished: self.results_lock.acquire() finished = sorted(list(self.results.keys())) self.results_lock.release() if all_keys != finished: logging.debug("Still waiting for priorities %s to finish", str(list(set(all_keys) - set(finished)))) at_least_1_runs = True for pool in pool_dict.values(): if pool._state != multiprocessing.pool.TERMINATE: pool.join() if not self.errors.empty(): raise RuntimeError("%i error(s) occured during reading of artifact list." % self.errors.qsize()) return self._get_artifact_list()
def thread_pool( size ): """ A context manager that yields a thread pool of the given size. On normal closing, this context manager closes the pool and joins all threads in it. On exceptions, the pool will be terminated but threads won't be joined. """ pool = multiprocessing.pool.ThreadPool( processes=size ) try: yield pool except: pool.terminate( ) raise else: pool.close( ) pool.join( )
def get_builds(db, jobs_dir, metadata, threads, client_class): """ Adds information about tests to a dictionary. Args: jobs_dir: the GCS path containing jobs. metadata: a dict of metadata about the jobs_dir. threads: how many threads to use to download build information. client_class: a constructor for a GCSClient (or a subclass). """ gcs = client_class(jobs_dir, metadata) print('Loading builds from %s' % jobs_dir) sys.stdout.flush() builds_have = db.get_existing_builds(jobs_dir) print('already have %d builds' % len(builds_have)) sys.stdout.flush() jobs_and_builds = gcs.get_builds(builds_have) pool = None if threads > 1: pool = multiprocessing.Pool(threads, mp_init_worker, (jobs_dir, metadata, client_class)) builds_iterator = pool.imap_unordered( get_started_finished, jobs_and_builds) else: global WORKER_CLIENT # pylint: disable=global-statement WORKER_CLIENT = gcs builds_iterator = ( get_started_finished(job_build) for job_build in jobs_and_builds) try: for n, (build_dir, started, finished) in enumerate(builds_iterator): print(build_dir) if started or finished: db.insert_build(build_dir, started, finished) if n % 200 == 0: db.commit() except KeyboardInterrupt: if pool: pool.terminate() raise else: if pool: pool.close() pool.join() db.commit()
def ScopedPool(*args, **kwargs): if kwargs.pop('kind', None) == 'threads': pool = multiprocessing.pool.ThreadPool(*args, **kwargs) else: orig, orig_args = kwargs.get('initializer'), kwargs.get('initargs', ()) kwargs['initializer'] = initer kwargs['initargs'] = orig, orig_args pool = multiprocessing.pool.Pool(*args, **kwargs) try: yield pool pool.close() except: pool.terminate() raise finally: pool.join()
def list_all_machines(cloud_ids, headers): "Given the cloud ids, runs in parallel queries to get all machines" def list_one_cloud(cloud_id): cloud_machines = requests.get('https://mist.io/clouds/%s/machines' % cloud_id, headers=headers) if cloud_machines.status_code == 200: machines = cloud_machines.json() for machine in machines: machine['cloud'] = cloud_id return machines return [] pool = multiprocessing.pool.ThreadPool(8) results = pool.map(list_one_cloud, cloud_ids) pool.terminate() machines = [] for result in results: machines.extend(result) return machines
def master_progress(mpu,num_processes,bucket,upload_list): x=0 print "proc = ?? " + str(num_processes) while True: try: if x!=num_parts: # logger.error(str(src.name) +" start " ) pool = NoDaemonProcessPool(processes=num_processes) value = pool.map_async(do_part_upload, gen_args(x,fold_last,upload_list)).get(99999999) print "when to finish??????" # print "dadadada " + str(value) que.put(value) src.close() mpu.complete_upload() logger.error(str(src.name) +" stop " ) #proc = subprocess.Popen('date', stdout=subprocess.PIPE) #print stdout print "mpu.complete src name " +src.name #os.remove(src.name) #print "index in proc = "+str(FileList.index(uploadFileNames)) lock.acquire() status_list[FileList.index(uploadFileNames)]='finish' print src.name +" finish "+str (status_list) critical_threadnum(Total_Threadnum,Threadnum,num_processes) print uploadFileNames +" add back now is " + str(Threadnum.value) lock.release() src.close() return value # pool.terminate() break except KeyboardInterrupt: logger.warn("Received KeyboardInterrupt, canceling upload") pool.terminate() mpu.cancel_upload() print "keyboarddddddddddddddddddddddddddddddd" break except IOError: break except Exception, err: logger.error("Encountered an error, canceling upload aaaaaaaaaaaa") print src.name logger.error(str(src.name)+str(err))
def _list_machines(self): self._machines = [] backends = self.backends() # show only enabled backends # enabled_backends = [backend for backend in backends if backend.enabled] def _list_one(backend): machines = [] try: machines = backend.machines() except: # could be a cloud with expired creds, so don't fail pass return machines pool = multiprocessing.pool.ThreadPool(10) results = pool.map(_list_one, backends) pool.terminate() for result in results: self._machines.extend(result)
def multiprocessing_pool(*args, **kwargs): """ A context manager providing a multiprocessing pool. Other Parameters ---------------- \\*args : list, optional Arguments. \\**kwargs : dict, optional Keywords arguments. Examples -------- >>> from functools import partial >>> def _add(a, b): ... return a + b >>> with multiprocessing_pool() as pool: ... pool.map(partial(_add, b=2), range(10)) ... # doctest: +SKIP [2, 3, 4, 5, 6, 7, 8, 9, 10, 11] """ class _DummyPool(object): """ A dummy multiprocessing pool that does not perform multiprocessing. Other Parameters ---------------- \\*args : list, optional Arguments. \\**kwargs : dict, optional Keywords arguments. """ def __init__(self, *args, **kwargs): pass def map(self, func, iterable, chunksize=None): """ Applies given function to each element of given iterable. """ return [func(a) for a in iterable] def terminate(self): """ Terminate the process. """ pass kwargs['initializer'] = _initializer kwargs['initargs'] = ({'scale': get_domain_range_scale()}, ) if _MULTIPROCESSING_ENABLED: pool_factory = multiprocessing.Pool else: pool_factory = _DummyPool pool = pool_factory(*args, **kwargs) yield pool pool.terminate()
def build(self): ncpus = num_cpus() log.log("Building useing " + str(ncpus) + " threads\n", log.BLUE) t = time.time() if ncpus > 1: import multiprocessing import multiprocessing.pool pool = multiprocessing.Pool( ncpus, initializer=_init_multiprocessing_helper) try: result = pool.map_async(builder, self.packages, chunksize=1) pool.close() while not result.ready(): try: result.get(1) # seconds except multiprocessing.TimeoutError: pass except KeyboardInterrupt: pool.terminate() raise pool.terminate() pool.join() results = result.get(1) # fix keyboard interupt # from multiprocessing.pool import IMapIterator # def wrapper(func): # def wrap(self, timeout=None): # Note: the timeout of 1 googol seconds introduces a rather subtle # bug for Python scripts intended to run many times the age of the universe. # return func(self, timeout=timeout if timeout is not None else 1e100) # return wrap # IMapIterator.next = wrapper(IMapIterator.next) # with multiprocessing.pool.Pool(ncpus) as pool: # results = pool.map(builder, self.packages) else: results = [] for path in self.packages: results.append(builder(path)) log.log( "TOTAL Time spent Compiling: %s Seconds\n" % (time.time() - t), log.BLUE) errors = [r for r in results if r] if errors: return ''.join(errors) else: log.log("There were no errors", log.GREEN) return False
def _spider(url, visited, root, depth, max_depth, raise_on_error): """Fetches URL and any pages it links to up to max_depth. depth should initially be zero, and max_depth is the max depth of links to follow from the root. Prints out a warning only if the root can't be fetched; it ignores errors with pages that the root links to. Returns a tuple of: - pages: dict of pages visited (URL) mapped to their full text. - links: set of links encountered while visiting the pages. """ pages = {} # dict from page URL -> text content. links = set() # set of all links seen on visited pages. # root may end with index.html -- chop that off. if root.endswith('/index.html'): root = re.sub('/index.html$', '', root) try: context = None verify_ssl = spack.config.get('config:verify_ssl') pyver = sys.version_info if (pyver < (2, 7, 9) or (3,) < pyver < (3, 4, 3)): if verify_ssl: tty.warn("Spack will not check SSL certificates. You need to " "update your Python to enable certificate " "verification.") elif verify_ssl: # We explicitly create default context to avoid error described in # https://blog.sucuri.net/2016/03/beware-unverified-tls-certificates-php-python.html context = ssl.create_default_context() else: context = ssl._create_unverified_context() # Make a HEAD request first to check the content type. This lets # us ignore tarballs and gigantic files. # It would be nice to do this with the HTTP Accept header to avoid # one round-trip. However, most servers seem to ignore the header # if you ask for a tarball with Accept: text/html. req = Request(url) req.get_method = lambda: "HEAD" resp = _urlopen(req, timeout=_timeout, context=context) if "Content-type" not in resp.headers: tty.debug("ignoring page " + url) return pages, links if not resp.headers["Content-type"].startswith('text/html'): tty.debug("ignoring page " + url + " with content type " + resp.headers["Content-type"]) return pages, links # Do the real GET request when we know it's just HTML. req.get_method = lambda: "GET" response = _urlopen(req, timeout=_timeout, context=context) response_url = response.geturl() # Read the page and and stick it in the map we'll return page = response.read().decode('utf-8') pages[response_url] = page # Parse out the links in the page link_parser = LinkParser() subcalls = [] link_parser.feed(page) while link_parser.links: raw_link = link_parser.links.pop() abs_link = urljoin(response_url, raw_link.strip()) links.add(abs_link) # Skip stuff that looks like an archive if any(raw_link.endswith(suf) for suf in ALLOWED_ARCHIVE_TYPES): continue # Skip things outside the root directory if not abs_link.startswith(root): continue # Skip already-visited links if abs_link in visited: continue # If we're not at max depth, follow links. if depth < max_depth: subcalls.append((abs_link, visited, root, depth + 1, max_depth, raise_on_error)) visited.add(abs_link) if subcalls: pool = NonDaemonPool(processes=len(subcalls)) try: results = pool.map(_spider_wrapper, subcalls) for sub_pages, sub_links in results: pages.update(sub_pages) links.update(sub_links) finally: pool.terminate() pool.join() except URLError as e: tty.debug(e) if hasattr(e, 'reason') and isinstance(e.reason, ssl.SSLError): tty.warn("Spack was unable to fetch url list due to a certificate " "verification problem. You can try running spack -k, " "which will not check SSL certificates. Use this at your " "own risk.") if raise_on_error: raise NoNetworkConnectionError(str(e), url) except HTMLParseError as e: # This error indicates that Python's HTML parser sucks. msg = "Got an error parsing HTML." # Pre-2.7.3 Pythons in particular have rather prickly HTML parsing. if sys.version_info[:3] < (2, 7, 3): msg += " Use Python 2.7.3 or newer for better HTML parsing." tty.warn(msg, url, "HTMLParseError: " + str(e)) except Exception as e: # Other types of errors are completely ignored, except in debug mode. tty.debug("Error in _spider: %s:%s" % (type(e), e), traceback.format_exc()) return pages, links
def eval(source, globals_={}, locals_={}): """Evaluate Pythonect code in the context of globals and locals. Args: source: A string representing a Pythonect code or a networkx.DiGraph() as returned by parse() globals: A dictionary. locals: Any mapping. Returns: The return value is the result of the evaluated code. Raises: SyntaxError: An error occurred parsing the code. """ return_value = None # Meaningful program? if source != "pass": logging.info('Program is meaningful') return_value = [] return_values = [] globals_values = [] locals_values = [] tasks = [] reduces = {} logging.debug('Evaluating %s with globals_ = %s and locals_ %s' % (source, globals_, locals_)) if not isinstance(source, networkx.DiGraph): logging.info('Parsing program...') graph = parse(source) else: logging.info('Program is already parsed! Using source AS IS') graph = source root_nodes = sorted([node for node, degree in graph.in_degree().items() if degree == 0]) if not root_nodes: cycles = networkx.simple_cycles(graph) if cycles: logging.info('Found cycles: %s in graph, using nodes() 1st node (i.e. %s) as root node' % (cycles, graph.nodes()[0])) root_nodes = [graph.nodes()[0]] logging.info('There are %d root node(s)' % len(root_nodes)) logging.debug('Root node(s) are: %s' % root_nodes) # Extend Python's __builtin__ with Pythonect's `lang` start_globals_ = __extend_builtins(globals_) logging.debug('Initial globals_:\n%s' % pprint.pformat(start_globals_)) # Default input start_globals_['_'] = start_globals_.get('_', locals_.get('_', None)) logging.info('_ equal %s', start_globals_['_']) # Execute Pythonect program pool = __create_pool(globals_, locals_) # N-1 for root_node in root_nodes[1:]: if globals_.get('__IN_EVAL__', None) is None and not _is_referencing_underscore(graph, root_node): # Reset '_' globals_['_'] = locals_['_'] = None if globals_.get('__IN_EVAL__', None) is None: globals_['__IN_EVAL__'] = True temp_globals_ = copy.copy(globals_) temp_locals_ = copy.copy(locals_) task_result = pool.apply_async(_run, args=(graph, root_node, temp_globals_, temp_locals_, {}, None, False)) tasks.append((task_result, temp_locals_, temp_globals_)) # 1 if globals_.get('__IN_EVAL__', None) is None and not _is_referencing_underscore(graph, root_nodes[0]): # Reset '_' globals_['_'] = locals_['_'] = None if globals_.get('__IN_EVAL__', None) is None: globals_['__IN_EVAL__'] = True result = _run(graph, root_nodes[0], globals_, locals_, {}, None, False) # 1 for expr_return_value in result: globals_values.append(globals_) locals_values.append(locals_) return_values.append([expr_return_value]) # N-1 for (task_result, task_locals_, task_globals_) in tasks: return_values.append(task_result.get()) locals_values.append(task_locals_) globals_values.append(task_globals_) # Reduce + _PythonectResult Grouping for item in return_values: # Is there _PythonectResult in item list? for sub_item in item: if isinstance(sub_item, _PythonectResult): # 1st Time? if sub_item.values['node'] not in reduces: reduces[sub_item.values['node']] = [] # Add Place holder to mark the position in the return value list return_value.append(_PythonectLazyRunner(sub_item.values['node'])) reduces[sub_item.values['node']] = reduces[sub_item.values['node']] + [sub_item.values] else: return_value.append(sub_item) # Any _PythonectLazyRunner's? if reduces: for return_item_idx in xrange(0, len(return_value)): if isinstance(return_value[return_item_idx], _PythonectLazyRunner): # Swap list[X] with list[X.go(reduces)] return_value[return_item_idx] = pool.apply_async(return_value[return_item_idx].go, args=(graph, reduces)) return_value = __resolve_and_merge_results(return_value) # [...] ? if return_value: # Single return value? (e.g. [1]) if len(return_value) == 1: return_value = return_value[0] # Update globals_ and locals_ # globals_, locals_ = __merge_all_globals_and_locals(globals_, locals_, globals_values, {}, locals_values, {}) # Set `return value` as `_` globals_['_'] = locals_['_'] = return_value if globals_.get('__IN_EVAL__', None) is not None: del globals_['__IN_EVAL__'] pool.close() pool.join() pool.terminate() return return_value
def close_multi(): '''Close multi stuff''' if pool is not None: pool.terminate()
def _run_next_virtual_nodes(graph, node, globals_, locals_, flags, pool, result): operator = graph.node[node].get('OPERATOR', None) return_value = [] not_safe_to_iter = False is_head_result = True head_result = None # "Hello, world" or {...} if isinstance(result, (basestring, dict)) or not __isiter(result): not_safe_to_iter = True # [[1]] if isinstance(result, list) and len(result) == 1 and isinstance(result[0], list): result = result[0] not_safe_to_iter = True # More nodes ahead? if operator: if not_safe_to_iter: logging.debug('not_safe_to_iter is True for %s' % result) head_result = result tmp_globals = copy.copy(globals_) tmp_locals = copy.copy(locals_) tmp_globals['_'] = tmp_locals['_'] = head_result return_value = __resolve_and_merge_results(_run(graph, node, tmp_globals, tmp_locals, {}, None, True)) else: # Originally this was implemented using result[0] and result[1:] but xrange() is not slice-able, thus, I have changed it to `for` with buffer for 1st result for res_value in result: logging.debug('Now at %s from %s' % (res_value, result)) if is_head_result: logging.debug('is_head_result is True for %s' % res_value) is_head_result = False head_result = res_value tmp_globals = copy.copy(globals_) tmp_locals = copy.copy(locals_) tmp_globals['_'] = tmp_locals['_'] = head_result return_value.insert(0, _run(graph, node, tmp_globals, tmp_locals, {}, None, True)) continue tmp_globals = copy.copy(globals_) tmp_locals = copy.copy(locals_) tmp_globals['_'] = tmp_locals['_'] = res_value # Synchronous if operator == '|': return_value.append(pool.apply(_run, args=(graph, node, tmp_globals, tmp_locals, {}, None, True))) # Asynchronous if operator == '->': return_value.append(pool.apply_async(_run, args=(graph, node, tmp_globals, tmp_locals, {}, None, True))) pool.close() pool.join() pool.terminate() logging.debug('return_value = %s' % return_value) return_value = __resolve_and_merge_results(return_value) # Loopback else: # AS IS if not_safe_to_iter: return_value = [result] # Iterate for all possible *return values* else: for res_value in result: return_value.append(res_value) # Unbox if len(return_value) == 1: return_value = return_value[0] return return_value
def _run_next_graph_nodes(graph, node, globals_, locals_, pool): operator = graph.node[node].get('OPERATOR', None) nodes_return_value = [] return_value = None # False? Terminate Flow. if isinstance(locals_['_'], bool) and locals_['_'] is False: return False if operator: # --> (a) # --> / | \ # (b) (c) (d) # \ | / # (e) next_nodes = sorted(graph.successors(node)) # N-1 for next_node in next_nodes[1:]: # Synchronous if operator == '|': nodes_return_value.append(pool.apply(_run, args=(graph, next_node, globals_, locals_, {}, None, False))) # Asynchronous if operator == '->': nodes_return_value.append(pool.apply_async(_run, args=(graph, next_node, globals_, locals_, {}, None, False))) # 1 nodes_return_value.insert(0, _run(graph, next_nodes[0], globals_, locals_, {}, None, False)) pool.close() pool.join() pool.terminate() return_value = __resolve_and_merge_results(nodes_return_value) else: # (a) # / | \ # (b) (c) (d) # \ | / # --> (e) return_value = locals_['_'] return return_value