def check_org(path: Path): # TODO not sure about org? org_files = list(sorted(path.rglob('*.org'))) from concurrent.futures import ProcessPoolExecutor as Pool with Pool() as pool: for f, res in zip(org_files, pool.map(check_aux, org_files)): for x in res: # TODO collect errors, report once? raise Failed(f, x)
def download(files, outdir=os.getcwd(), bucket='epionengs', threads=3): if type(files) == str: files = [x.strip() for x in open(files)] cmds = list() for each in files: cmd = 'aws s3 cp --only-show-errors s3://{bucket}/{each} {outdir}'.format( bucket=bucket, each=each, outdir=outdir) cmds.append(cmd) with Pool(threads) as pool: pool.map(run_cmd, cmds)
async def bootstrap_server(app, loop): # pragma: no cover ''' Preload stuff ''' with open('VERSION', 'r') as f: app.VERSION = f.read() app.model = Model() app.executor = Pool(max_workers=10)
def simple_zip_output(self, jobspec, tmp_log): tmp_log.debug('start') self.zip_tmp_log = tmp_log self.zip_jobSpec = jobspec argDictList = [] try: for fileSpec in jobspec.outFiles: if self.zipDir == "${SRCDIR}": # the same directory as src zipDir = os.path.dirname( next(iter(fileSpec.associatedFiles)).path) elif self.zipDir == "${WORKDIR}": # work dir workSpec = jobspec.get_workspec_list()[0] zipDir = workSpec.get_access_point() else: zipDir = self.zipDir zipPath = os.path.join(zipDir, fileSpec.lfn) argDict = dict() argDict['zipPath'] = zipPath argDict['associatedFiles'] = [] for assFileSpec in fileSpec.associatedFiles: if os.path.exists(assFileSpec.path): argDict['associatedFiles'].append(assFileSpec.path) else: assFileSpec.status = 'failed' argDictList.append(argDict) # parallel execution try: nThreadsForZip = harvester_config.stager.nThreadsForZip except Exception: nThreadsForZip = multiprocessing.cpu_count() with Pool(max_workers=nThreadsForZip) as pool: retValList = pool.map(self.make_one_zip, argDictList) # check returns for fileSpec, retVal in zip(jobspec.outFiles, retValList): tmpRet, errMsg, fileInfo = retVal if tmpRet is True: # set path fileSpec.path = fileInfo['path'] fileSpec.fsize = fileInfo['fsize'] fileSpec.chksum = fileInfo['chksum'] msgStr = 'fileSpec.path - {0}, fileSpec.fsize - {1}, fileSpec.chksum(adler32) - {2}' \ .format(fileSpec.path, fileSpec.fsize, fileSpec.chksum) tmp_log.debug(msgStr) else: tmp_log.error( 'got {0} with {1} when zipping {2}'.format( tmpRet, errMsg, fileSpec.lfn)) return tmpRet, 'failed to zip with {0}'.format(errMsg) except Exception: errMsg = core_utils.dump_error_message(tmp_log) return False, 'failed to zip with {0}'.format(errMsg) tmp_log.debug('done') return True, ''
def download(urls, dest_dir, jobs=PARALLEL_DOWNLOADS): with Pool(max_workers=jobs) as pool: futures = [pool.submit(download1, url, dest_dir) for url in urls] try: for future in as_completed(futures): wheel_name = future.result() yield wheel_name except KeyboardInterrupt: for future in futures: future.cancel() raise
def _get_pool(pool_type: Type, max_workers: int) -> Pool: if pool_type == ThreadPool: pool = ThreadPool(max_workers) setattr(pool, "imap", pool.map) return pool elif pool_type == Pool: pool = Pool(max_workers, mp_context=get_context("spawn")) setattr(pool, "imap", pool.map) return pool else: raise TypeError(f"Unknown pool type: {pool_type}")
def update_one(self, yt_channel_id) -> None: """Check every channel for new videos.""" channels = self.database.get_channels_filter( yt_channel_id) #for channel num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2 with Pool(num_workers) as pool: videos = chain.from_iterable( pool.map(self._update_channel, channels)) self.database.add_videos(videos)
def applybpe(codes, outdir, files): outdir = Path(outdir) bpe = create_subword_bpe(codes) with Pool() as pool: for path in files: path = Path(path) outfile = outdir.joinpath(path.name) with open(path, 'r') as fp: bped = pool.map(bpe.process_line, fp, chunksize=1024) outfile.write_text(''.join(bped))
def main(): output_dict = collections.defaultdict(list) nproc = args.nproc with Pool(nproc) as pool: for i, output_data in enumerate(pool.map(calc_vpf, range(args.Nvpf))): if i % nproc == nproc - 1: print i print str(datetime.now()) output_dict['vpf'].append(output_data) with Pool(nproc) as pool: for i, output_data in enumerate(pool.map(calc_ds, range(args.Nds))): if i % nproc == nproc - 1: print i print str(datetime.now()) output_dict['deltasigma'].append(output_data) for name in output_names: output_dict[name] = np.array(output_dict[name]) np.savez(args.outfile, **output_dict)
def _tokenize_mp(lines): length = len(lines) cpus = os.cpu_count() chunks = list(map('\n'.join, chunked(lines, 10000))) with Pool(cpus) as pool: output = pool.map(_tokenize, chunks) output = list(chain.from_iterable(i.split('\n') for i in output)) assert length == len(output), 'Input: {} lines, output: {} lines'.format( length, len(output)) return output
def feature_matrix(xs: Iterable[T], featurize: Callable[[T], np.ndarray], ncpu: int = 0) -> np.ndarray: """Calculate the feature matrix of xs with the given featurization function""" if ncpu <= 1: X = [featurize(x) for x in tqdm(xs, desc='Featurizing', smoothing=0.)] else: with Pool(max_workers=ncpu) as pool: X = list(tqdm(pool.map(featurize, xs), desc='Featurizing')) return np.array(X)
def run(self): """多进程跑""" tasks = [] ranges = self.cutRange(self.start, self.end, self.workers) start_time = time.time() with Pool(max_workers=self.workers) as executor: for start, end in ranges: print("processor start: %s, end: %s" % (start, end)) tasks.append(executor.submit(self.singleProcess, start, end)) for task in tasks: task.result() print("total time: %s" % (time.time() - start_time))
def run(self): """Runs emacsclient.""" if self._lisp is not None: self._cmd.append('-e') self._cmd.append(self._lisp) elif len(self._files) > 0: for f in self._files: self._cmd.append(f) pool = Pool(max_workers=1) proc = pool.submit(subprocess.call, self._cmd, env=self._env) return proc
def main(model_gen_func, fiducial, output_fname): global model model = model_gen_func() global fid fid = np.array(fiducial) params = fid * np.ones((7 * args.Nparam, 7)) dp_range = np.array((0.11, 0.05, 0.225, 0.9, 0.12)) for i in range(5): params[args.Nparam * i:args.Nparam * (i + 1), i] += (2. * np.random.random(args.Nparam) - 1) * min( dp_range[i], fid[i]) params[args.Nparam * 5:args.Nparam * 6, 5] = 2. * np.random.random(args.Nparam) - 1 params[args.Nparam * 6:args.Nparam * 7, 6] = 2. * np.random.random(args.Nparam) - 1 output_dict = collections.defaultdict(list) nproc = args.nproc global halocat with Pool(nproc) as pool: if args.simname == 'consuelo20' and args.version == 'all': for box in consuelo20_box_list: halocat = CachedHaloCatalog(simname = args.simname, version_name = box,redshift = args.redshift, \ halo_finder = args.halofinder) model.populate_mock(halocat) for i, output_data in enumerate( pool.map(calc_all_observables, params)): if i % nproc == nproc - 1: print i print str(datetime.now()) for name, data in zip(output_names, output_data): output_dict[name].append(data) print box else: halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \ halo_finder = args.halofinder) model.populate_mock(halocat) for i, output_data in enumerate( pool.map(calc_all_observables, params)): if i % nproc == nproc - 1: print i print str(datetime.now()) for name, data in zip(output_names, output_data): output_dict[name].append(data) for name in output_names: output_dict[name] = np.array(output_dict[name]) np.savez(output_fname, **output_dict)
def evolve(self, n_pop, env, data=None): models = [(self.clone().noise(std=100), i) for i in range(n_pop)] with Pool(8) as p: rewards = p.map(lambda mdl: env(data, *mdl), models) rewards = np.array(list(rewards)) goods = sum(rewards >= 1) total = sum(rewards) for m, r in zip(models, rewards): m = m[0] for tp, mp in zip(self.parameters(), m.parameters()): tp.add_(mp * 0.1 * (int(r) / int(total))) return goods
def main(max_workers): logger.info("begin") with Pool(max_workers=max_workers) as pool: for file in listdir('flv'): file = 'flv/' + file f = pool.submit( call, 'ffmpeg -loglevel quiet -y -i {0} -vcodec copy -acodec copy {1}.mp4' .format(file, splitext(file)[0]), shell=True) f.add_done_callback(callback)
def work(self): try: products = self.get_products() with Pool(32) as pl: pl.map(self.get_data_by_id, products) except Exception as why: self.logger.error('fail to push wish template cause of {} '.format(why)) name = os.path.basename(__file__).split(".")[0] raise Exception(f'fail to finish task of {name}') finally: self.close()
def scrape_todos_links(index: Dict) -> Dict: index = deepcopy(index) def mapper(k: str, v: Dict) -> Tuple[str, Dict]: scraper = configuration['scrape_links'](k) # type: ignore return k, scrape_links_topico(v, scraper=scraper) print('Scraping links de:\n\t' + "\n\t".join(index)) with Pool(10) as executor: return dict( executor.map(mapper, *zip(*index.items())) )
def main(model_gen_func, params_fname, params_usecols, output_fname): global model model = model_gen_func() median_w = np.median(np.loadtxt(params_fname, usecols=params_usecols), axis=0) params = median_w * np.ones((500 + 7 * 1000, 7)) ##take medians dp_range = np.array((0.5, 0.5, 0.25, 0.5, 0.5, 1, 1)) for i in params_usecols: params[1000 * i + 500:1000 * i + 1500, i] += (2. * np.random.random(1000) - 1) * dp_range[i] output_dict = collections.defaultdict(list) nproc = args.nproc global halocat global c with Pool(nproc) as pool: if args.simname == 'consuelo20' and args.version == 'all': for box in consuelo20_box_list: halocat = CachedHaloCatalog(simname = args.simname, version_name = box,redshift = args.redshift, \ halo_finder = args.halofinder) model.populate_mock(halocat) c = Ngal_estimate(halocat, 150000) for i, output_data in enumerate( pool.map(calc_all_observables, params)): if i % nproc == nproc - 1: print i print str(datetime.now()) for name, data in zip(output_names, output_data): output_dict[name].append(data) print box else: halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \ halo_finder = args.halofinder) model.populate_mock(halocat) c = Ngal_estimate(halocat, 150000) for i, output_data in enumerate( pool.map(calc_all_observables, params)): if i % nproc == nproc - 1: print i print str(datetime.now()) for name, data in zip(output_names, output_data): output_dict[name].append(data) for name in output_names: output_dict[name] = np.array(output_dict[name]) np.savez(output_fname, **output_dict)
def map_par(self, run_num): ''' This function maps out what stim and score function pairs should be mapped to be evaluated in parallel first it finds the pairs with the highest weights, the maps them and then adds up the score for each stim for every individual. Parameters -------------------- run_num: the amount of times neuroGPU has ran for 8 stims Return -------------------- 2d list of scalar scores for each parameter set w/ shape (nindv,nstims) ''' fxnsNStims = utils.top_SFs( run_num, score_function_ordered_list, self.weights, nGpus) # 52 stim-sf combinations (stim#,sf#) with Pool(nCpus) as p: # parallel mapping res = p.map(self.eval_stim_sf_pair, fxnsNStims) res = np.array( list(res) ) ########## important: map returns results with shape (# of sf stim pairs, nindv) res = res[:, :] prev_sf_idx = 0 # look at key of each stim score pair to see how many stims to sum #num_selected_stims = len(set([pair[0] for pair in fxnsNStims])) # not always using 8 stims last_stim = (run_num + 1) * nGpus # ie: 0th run last_stim = (0+1)*8 = 8 first_stim = last_stim - nGpus # on the the last round this will be 24 - 8 = 16 if last_stim > 18: last_stim = 18 #print(last_stim, first_stim, "last and first") for i in range(first_stim, last_stim): # iterate stims and sum num_sfs = sum([1 for pair in fxnsNStims if pair[0] == i ]) #find how many sf indices for this stim #print([pair for pair in fxnsNStims if pair[0]==i], "pairs from : ", run_num) #print(fxnsNStims[prev_sf_idx:prev_sf_idx+num_sfs], "Currently evaluating") if i % nGpus == 0: weighted_sums = np.reshape( np.sum(res[prev_sf_idx:prev_sf_idx + num_sfs, :], axis=0), (-1, 1)) else: #print(prev_sf_idx, "stim start idx", num_sfs, "stim end idx") curr_stim_sum = np.sum(res[prev_sf_idx:prev_sf_idx + num_sfs, :], axis=0) curr_stim_sum = np.reshape(curr_stim_sum, (-1, 1)) weighted_sums = np.append(weighted_sums, curr_stim_sum, axis=1) #print(curr_stim_sum.shape," : cur stim sum SHAPE ", weighted_sums.shape, ": weighted sums shape") prev_sf_idx = prev_sf_idx + num_sfs # update score function tracking index return weighted_sums
def process_chunk(wr, chunk, pbar): with Pool(int(cpu_count()*args.cpu)) as p: chunksize = len(chunk)//10000 + 1 d = list(set([x[0] for x in chunk]) | set([x[1] for x in chunk])) d = {k: v for k, v in zip( d, p.map(name_preprocessing, d, chunksize=chunksize))} a = [(x[0], d[x[0]]) for x in chunk] b = [(x[1], d[x[1]]) for x in chunk] for res in p.map(do, zip(a, b), chunksize=chunksize): pbar.update(1) if res: wr.writerow(res)
def update_all(self) -> None: """Check every channel for new videos.""" channels = self.database.get_channels() num_workers = unpack_optional(os.cpu_count(), lambda: 1) * 2 with Pool(num_workers) as pool: videos = chain.from_iterable( pool.map(self._update_channel, channels)) try: self.database.add_videos(videos) except sqlalchemy.exc.OperationalError as original: raise DatabaseOperationalError() from original
def work(self): try: tokens = self.get_ebay_token() with Pool(2) as pl: pl.map(self.update_inventory, tokens) except Exception as why: self.logger.error( 'fail to update ebay inventory cause of {} '.format(why)) name = os.path.basename(__file__).split(".")[0] raise Exception(f'fail to finish task of {name}') finally: self.close()
def main(): epilog = """ environment variables: GITHUB_API_TOKEN\tGitHub API token used when updating github packages """ parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, epilog=epilog) parser.add_argument('package', type=str, nargs='+') parser.add_argument('--target', type=str, choices=SEMVER.keys(), default='major') parser.add_argument('--commit', action='store_true', help='Create a commit for each package update') parser.add_argument( '--use-pkgs-prefix', action='store_true', help= 'Use python3Packages.${pname}: instead of python: ${pname}: when making commits' ) args = parser.parse_args() target = args.target packages = list(map(os.path.abspath, args.package)) logging.info("Updating packages...") # Use threads to update packages concurrently with Pool() as p: results = list( filter(bool, p.map(lambda pkg: _update(pkg, target), packages))) logging.info("Finished updating packages.") commit_options = {} if args.use_pkgs_prefix: logging.info("Using python3Packages. prefix for commits") commit_options["pkgs_prefix"] = "python3Packages." # Commits are created sequentially. if args.commit: logging.info("Committing updates...") # list forces evaluation list(map(lambda x: _commit(**x, **commit_options), results)) logging.info("Finished committing updates") count = len(results) logging.info("{} package(s) updated".format(count))
def build_extensions(self): """Function to monkey-patch distutils.command.build_ext.build_ext.build_extensions """ self.check_extensions_list(self.extensions) try: num_jobs = os.cpu_count() except AttributeError: num_jobs = multiprocessing.cpu_count() with Pool(num_jobs) as pool: pool.map(self.build_extension, self.extensions)
def main(model_gen_func, params_fname, params_usecols, output_fname): global model model = model_gen_func() nparams = args.Nreal * 77 median_w = np.median(np.loadtxt(params_fname, usecols=params_usecols), axis=0) params = median_w * np.ones((nparams, 7)) ##take medians for i in params_usecols: for j in range(11): params[11 * args.Nreal * i + args.Nreal * j:11 * args.Nreal * i + args.Nreal * j + args.Nreal, i] += args.stepsize[i] * (j - 5) output_dict = collections.defaultdict(list) nproc = args.nproc global halocat with Pool(nproc) as pool: if args.simname == 'consuelo20' and args.version == 'all': for box in consuelo20_box_list: halocat = CachedHaloCatalog(simname = args.simname, version_name = box,redshift = args.redshift, \ halo_finder = args.halofinder) model.populate_mock(halocat) for i, output_data in enumerate( pool.map(calc_all_observables, params)): if i % nproc == nproc - 1: print i print str(datetime.now()) for name, data in zip(output_names, output_data): output_dict[name].append(data) print box else: halocat = CachedHaloCatalog(simname = args.simname, version_name = args.version,redshift = args.redshift, \ halo_finder = args.halofinder) model.populate_mock(halocat) for i, output_data in enumerate( pool.map(calc_all_observables, params)): if i % nproc == nproc - 1: print i print str(datetime.now()) for name, data in zip(output_names, output_data): output_dict[name].append(data) for name in output_names: output_dict[name] = np.array(output_dict[name]) np.savez(output_fname, **output_dict)
def __init__(self, server, nickname, port, channel, password): self.socket = socket.socket() self.server = server self.nickname = nickname self.port = port self.channel = channel self.password = password self.config = Config() self.osu = OsuApi(self.config.osu_api_key) self.pool = Pool(8) self.engine = create_engine(self.config.engine_str, **self.config.engine_args) self.Session = scoped_session(sessionmaker(bind=self.engine)) self.senders = {}
def fortran_execute(): from concurrent.futures import ProcessPoolExecutor as Pool args = "sleep 2; echo complete" pool = Pool(max_workers=1) future = pool.submit(test) future.run_type = "run_type" future.jid = "jid" future.add_done_callback(fortran_callback) print("Fortran executed") return 111
def QA_Fetcher(code, type_): with Pool(max_workers=40) as executor: future_tasks = [executor.submit(single_task, code, type_)] for f in future_tasks: if f.running(): print('%s is running' % str(f)) for f in as_completed(future_tasks): try: if f.done(): data = f.result() return data except Exception as e: f.cancel() print(str(e))
def run_predictor(input_folder: str): pool = Pool(max_workers=1) f = pool.submit( subprocess.call, f"""python predict.py \ --sensitive --Transformation TPS --FeatureExtraction ResNet \ --SequenceModeling BiLSTM --Prediction Attn \ --saved_model /app/TPS-ResNet-BiLSTM-Attn-case-sensitive.pth \ --workers 1 \ --image_folder {input_folder}""", cwd="/app/deep-text-recognition-benchmark", shell=True, ) f.add_done_callback(callback) pool.shutdown(wait=True)