def main(): configFile, infolder, outfolder, max_workers = parseArgs() trimmer = Trimmer(infolder, outfolder) parser = Parser(configFile) pool = ProcessPoolExecutor(max_workers) for videoName, trimConfigs in parser: pool.map(trimmer.trim, trimConfigs)
def objective(x): logger.info('x: {}'.format(x)) now = datetime.now() logdir = os.path.join('./log', now.strftime('%Y%m%d-%H%M%S')) os.makedirs(logdir, exist_ok=True) executor = ProcessPoolExecutor() inpdir = './testcase/' args_list = [] for fname in sorted(os.listdir(inpdir)): if fname.find('.in') == -1: continue fpath = os.path.join(inpdir, fname) args_list.append([fpath, logdir, x]) total = len(args_list) for res in tqdm(executor.map(run_experiment, args_list), total=total, ncols=0): pass regrets = [] for res in tqdm(executor.map(parse_log, args_list), total=total, ncols=0): regrets.append(res) mean = np.mean(regrets) std = np.std(regrets) logger.info('mean: {}, std: {}'.format(mean, std)) return -mean
def execCmd(args): ips = getIp(args.ip) if not ips: return # 在进程池使用queue报错了 # q1 = Queue() # for ip in ips: # for port in range(1, 1025): # q1.put((ip, port)) result = [] if args.fun == 'ping': p = ProcessPoolExecutor(max_workers=args.num) result = list(p.map(pingExec, ips)) # p.shutdown(wait=True) # print(result) # print('succ: ', list(row[0] for row in result if row[1])) # print('fail: ', list(row[0] for row in result if not row[1])) elif args.fun == 'tcp': task = ((ip, port) for ip in ips for port in range(22, 33)) p = ProcessPoolExecutor(max_workers=args.num) result = list(p.map(tcpExec, task)) else: print('-f opt must is ping or tcp!') return saveExec(args.save, result)
def crawl_pic(): chrome_options = ChromeOptions() chrome_options.add_argument('--headless') chrome_options.add_argument('--disable-gpu') driver = webdriver.Chrome(chrome_options=chrome_options) driver.get(PIC_SEARCH_BASE_URL) while True: bef_height = driver.execute_script('return document.body.scrollHeight') driver.execute_script("window.scrollTo(0,document.body.scrollHeight)") time.sleep(1) aft_height = driver.execute_script('return document.body.scrollHeight') if aft_height == bef_height: break soup = BeautifulSoup(driver.page_source, 'lxml') div = soup.find(id='mmComponent_images_1') imgs = div.find_all('img') img_urls = [] for img in imgs: img_url = img.attrs.get('src', None) or img.attrs.get('data-src', None) if img_url: img_urls.append(img_url) print('image urls crawl finished,begin to download ...') pool = ProcessPoolExecutor(max_workers=cpu_count()) pool.map(save_pic, img_urls) print('download pictures finished ...')
def pmap_main(argv): t_start = time.time() arg_n, arg_f, arg_w, arg_m, arg_ip, arg_v = arg_parse(argv) if (arg_m == "proc"): pool = ProcessPoolExecutor(max_workers=arg_n) elif (arg_m == "thread"): pool = ThreadPoolExecutor(max_workers=arg_n) func_arg = [i for i in range(arg_ip[0], arg_ip[1])] if (arg_f == "tcp"): result_itor = pool.map(handler_tcp, func_arg) else: result_itor = pool.map(handler_ping, func_arg) pool.shutdown() json_result = {} for i in result_itor: json_result[i[0]] = i[1] print("%s : %s" % (i[0], i[1])) if (arg_w): try: with open(arg_w, 'w') as save: json.dump(json_result, save) except Exception as e: print("save result to %s error: %s" % (arg_w, e)) else: print(arg_w) if (arg_v): print("time use: %f" % (time.time() - t_start))
def run_experiments(): now = datetime.now() logdir = os.path.join('./log', now.strftime('%Y%m%d-%H%M%S')) os.makedirs(logdir, exist_ok=True) executor = ProcessPoolExecutor() args_list = [] for i in range(100): args_list.append([i + 1, logdir]) total = len(args_list) for res in tqdm(executor.map(run_experiment, args_list), total=total, ncols=0): pass scores = [] for res in tqdm(executor.map(parse_log, args_list), total=total, ncols=0): scores.append(res) scores = np.array(scores) mean = np.mean(scores) std = np.std(scores) max_score = np.max(scores) max_score_idx = np.argmax(scores) + 1 logger.info('mean: {}, std: {}, max: {}(seed:{})'.format( mean, std, max_score, max_score_idx))
def tcp_scan(host): p = ProcessPoolExecutor(max_workers=8) p.map(scan, [(host, port) for port in [ 53, 80, 8080, 3128, 8081, 9080, 1080, 21, 23, 443, 69, 22, 25, 110, 7001, 9090, 3389, 1521, 1158, 2100, 1433 ]], chunksize=100) #把任务分块提交
def get_wuliaotu(url): global start_url, Flag try: resp = requests.get(url, headers=header) bs = BeautifulSoup(resp.text, 'html.parser') next_url = 'http:' + bs.find( 'a', class_='previous-comment-page').get('href') except Exception as err: print(f'Error:{err}') Flag = False return Flag url_ls = set() for item in bs.find_all('span', class_='img-hash'): url = ('http:' + str(base64.b64decode(item.string.encode('utf-8')))[2:]).replace( '\'', '') url_ls.add(url) pool = ProcessPoolExecutor(max_workers=8) pool.map(download_data, url_ls) url_ls.clear() start_url = next_url time.sleep(random.randint(3, 6))
def inference_all_questions(self, question_folder, cpu_num=1): base_dir = os.getcwd() print('base_dir: ', base_dir) # os.makedirs(question_folder, exist_ok=True) os.chdir(question_folder) print('numcores: ', cpu_num) audios = os.listdir() audios = [audio for audio in audios if audio.endswith('.wav')] if cpu_num > 1: audios_for_one_process = len(audios) // cpu_num if audios_for_one_process < 1: audios_for_one_process = 1 print('audios count: ', len(audios)) print('audio_for_one_process: ', audios_for_one_process) tmp = [] splitted_audio = [] for audio in audios: if audios_for_one_process == len(tmp): splitted_audio.append(tmp) tmp = [] tmp.append(audio) elif audio == audios[-1]: # 마지막까지 갔을 때.. splitted_audio.append(tmp) else: tmp.append(audio) else: splitted_audio = audios print('splitted audios: {}'.format(len(splitted_audio))) if cpu_num > 1: exe = Executor(max_workers=cpu_num) futures = [] for audio in splitted_audio: futures.append( exe.submit(self.inference_splitted_audios, audio)) results = [future.result() for future in tqdm(futures)] print(results) # print('multiprocessing Result: ', len(result)) # exe.shutdown(wait=True) # exe.shutdown(wait=True) # print(future.result) # result = parmap.map(self.inference_splitted_audios, splitted_audio, pm_pbar=True, pm_processes=cpu_num) else: with Executor(max_workers=cpu_num) as exe: exe.map(self.inference_audio, splitted_audio) exe.shutdown(wait=True) # result = parmap.map(self.inference_audio, splitted_audio, pm_pbar=True, pm_processes=cpu_num) os.chdir(base_dir) return question_folder, results
def main(): executor = ProcessPoolExecutor(max_workers=3) t1 = time.time() try: executor.map(get_StocksDailyPrice, stocksPool) except Exception as err: print('cannot', err) t2 = time.time() duration = t2 - t1 print("aaa", duration) executor.shutdown()
def multiproc(): os.chdir("D:\\OneDrive") outputs = [] csvdata = csv.reader(open('PSN SUMMER SALES.csv')) titles = [item[0] for item in csvdata] pool = ProcessPoolExecutor(max_workers=4) results = list(pool.map(metacriticgames, titles))
def main(): q = Manager().Queue() wrk = logging.getLogger('worker') wrk.setLevel(logging.DEBUG) sh = logging.StreamHandler() wrk.addHandler(sh) lp = logging.handlers.QueueListener(q, GeneralHandler(), respect_handler_level=False) lp.start() numbs = ((x, q) for x in range(10)) executor = ProcessPoolExecutor(max_workers=2) vals = executor.map(double, numbs) ''' workers = [] for i in range(10): wp = Process(target=double, args=([i, q],)) workers.append(wp) wp.start() for wp in workers: wp.join() ''' print(list(vals)) lp.stop()
def my_test_1(): # step_list = np.pi * np.linspace(0.001, 0.5, 50) step_list = np.pi * np.linspace(0.001, 0.05, 30) pool = Pool(max_workers=3) infor_list = pool.map(run_many_times, step_list) # 使用多进程改进 # infor_list = [run_many_times(step) for step in step_list] infor_list = sorted(list(infor_list)) print(infor_list) with open('mydata2.csv', 'w') as f: f.write('{},{},{},{},{},{},{},{},{},{},{}\n'.format( 'theta', 'avg', 'median', 'best', 'global_best', 'better_count', 'first_better_generation', 'best_in_many_times', 'best_x_in_many_times', 'best_y_in_many_times', 'best_chrom_in_many_times')) for s, d in infor_list: for k, v in d.items(): print(k, v) with open('mydata2.csv', 'a') as f: f.write("{},{},{},{},{},{},{},{},{},{},{}\n".format( s.replace('pi', ''), d['avg_fitness'], d['median_fitness'], d['best_fitness'], d['global_best_fitness'], d['better_fitness_count'], d['first_better_fitness_generation'], d['best_fitness_in_many_times'], d['best_fitness_solution_in_many_times'][0], d['best_fitness_solution_in_many_times'][1], d['best_fitness_chrom_in_many_times']))
def main(): start = time.time() pool = ProcessPoolExecutor(max_workers=2) # The one change results = list(pool.map(my_module.gcd, NUMBERS)) end = time.time() delta = end - start print(f'Took {delta:.3f} seconds')
def process_data(): f = open(DATA_FILE) data = json.load(f) res = pandas.DataFrame(data) aux = res.transpose() l = [] d = set() pool = ProcessPoolExecutor(max_workers=40) count = 0 for i, r in aux.iterrows(): v = r['publicKeyRaw'] if v in d: continue d.add(v) checks = pool.map(find_duplicates, [[v, aux.copy()]] , chunksize=1000) for c in checks: if len(c) > 1: l.append(c) count = count +1 with open("collisions.json", 'w') as outfile: outfile.write(json.dumps([df.transpose().to_dict() for df in l]))
def compute_topographic_similarity_parallel(sentences,features,comprange=100, max_workers=32): executor = ProcessPoolExecutor(max_workers=max_workers) indices = list(range(len(sentences))) levs = [] for idx1, idx1_levs in tqdm(zip(indices, executor.map(compute_levenshtein_distance_for_idx_over_comprange, itertools.repeat(sentences), indices, itertools.repeat(comprange)))): for l in idx1_levs: levs.append(l) indices = list(range(len(features))) cossims = [] for idx1, idx1_cossims in tqdm(zip(indices, executor.map(compute_cosine_sim_for_idx_over_comprange, itertools.repeat(features), indices, itertools.repeat(comprange)))): for c in idx1_cossims: cossims.append(c) rho, p = spearmanr(levs, cossims) return -rho, p, levs, cossims
def multi_processing_map(self, n_task=2000, depth=35): pp = ProcessPoolExecutor() result = [ item for item in pp.map(self.fib, [depth for i in range(n_task)]) ] print("Multi Process Result:{}".format(result)) return result
def parallel_tasks_run(task_function, task_data: list, num_prints: O[int] = None, print_function=default_print_task_function, level=1, task_name: str = 'unnamed-task', num_threads: int = 1, use_process: bool = True) -> list: num_task = len(task_data) logging.info(':: start ({}) #data/#threads = {}/{}, use-process({})' .format(task_name, num_task, num_threads, use_process)) if num_prints is not None: step_print = int(np.ceil(num_task / num_prints)) else: step_print = None print_function = partial(print_function, step_=step_print, task_name=task_name, level=level) task_data = [[xi, len(task_data), print_function, x if isinstance(x, list) else [x]] for xi, x in enumerate(task_data)] if (len(task_data) < 2) or (num_threads < 2): ret = [task_function(x) for x in task_data] else: if use_process: pool_ = ProcessPoolExecutor(max_workers=num_threads) else: pool_ = ThreadPoolExecutor(max_workers=num_threads) ret = list(pool_.map(task_function, task_data)) pool_.shutdown() return ret
def para_data_allo_1(Theta, cpu_num, rng, d_struct, xi_n, Data_struct): time.sleep(0.5) # print(" id: {} , is dealing the auction with {} bidder ".format(threading.get_ident(),pub[2])) TT, _ = Data_struct.shape print("the length of the auction is {}".format(TT)) results = [] try: func = partial(para_fun_est, Theta, rng, xi_n, d_struct['h']) pool = ProcessPoolExecutor(max_workers=cpu_num) results = pool.map( func, zip(range(0, TT), Data_struct['bidder_state'], Data_struct['bidder_pos'], Data_struct['price_norm'], Data_struct[Pub_col].values.tolist())) MoM = np.nansum(list(results)) except np.linalg.LinAlgError as err: if 'Singular matrix' in str(err): return 10**5 else: print(err) exit(1) return MoM / TT
def calculateCorrelationsToCentroidInParallel(voxelIndices, allVoxelTs, centroidIndices, nCPUs=5): """ A wrapper function for calculating the correlation to ROI centroid in parallel across ROIs. Parameters: ----------- voxelIndices: list of np.arrays, each array containing indices of voxels of one ROI; these indices should refer to voxels' locations in the file containing voxel time series; note that the chunk must contain at least one voxel allVoxelTs: structured np.array with a field name 'roi_voxel_ts' (and possible additional fields), this field contains voxel time series centroidIndices: np.array of indices of ROI centroids in allVoxelTs nCPUs = int, number of CPUs to be used for the parallel computing (default = 5) Returns: -------- correlationsToCentroid: list of doubles, correlations of the voxel ts defined by voxelIndices to the ROICentroids """ cfg = {'allVoxelTs': allVoxelTs} paramSpace = [(cfg, { 'voxelIndices': voxelInd, 'centroidIndex': centroidIndex }) for voxelInd, centroidIndex in zip(voxelIndices, centroidIndices)] pool = Pool(max_workers=nCPUs) correlationsToCentroid = list( pool.map(calculateCorrelationToCentroid, paramSpace, chunksize=1)) return correlationsToCentroid
def main_concurrent_from_future(): start = time.time() pool = ProcessPoolExecutor(max_workers=2) results = list(pool.map(gcd, NUMBERS)) end = time.time() delta = end - start print(f'Operation took {delta:.3f} seconds\n')
def test_3(): start = time() pool = ProcessPoolExecutor(max_workers=2) results = list(pool.map(gcd, numbers)) print(results) end = time() print('Took %.3f seconds' % (end - start))
def _convert(args): sourcedir = Path(args.sourcedir) targetdir = Path(args.targetdir) os.makedirs(targetdir, exist_ok=True) jobs = [] for source_path in find_images(args.sourcedir): source_name = source_path.relative_to(sourcedir) target_name = Path(str(source_name).replace(source_name.suffix, '.bmp')) target_path = targetdir / target_name if target_path.exists() and target_path.stat().st_size > 0: continue jobs.append(ConvertJob( source_path, target_path, args.target_smaller_size, )) print(f'{len(jobs)} files to process') executor = ProcessPoolExecutor() iterator = executor.map(_process_image, jobs) for _ in iterator: pass print('Done')
def main(): start = time.time() pool = ProcessPoolExecutor(max_workers=2) # 이 부분만 바꿈 results = list(pool.map(my_module.gcd, NUMBERS)) end = time.time() delta = end - start print(f'총 {delta:.3f} 초 걸림')
def maintain(infile, func): # outfile = infile + ".temp" # if not os.path.exists(outfile): # open(outfile, "w", encoding="utf8").close() # with open(infile, encoding="utf8") as csvfile: # num_written = num_lines_in_file(outfile) # for idx, row in enumerate(csv.reader(csvfile, delimiter="|")): # if (idx >= num_written): # row = func(row) # append_to_file(outfile, row) # os.replace(outfile, infile) outfile = infile + ".temp" if not os.path.exists(outfile): open(outfile, "w", encoding="utf8").close() rows = [] with open(infile, encoding="utf8") as csvfile: for row in csv.reader(csvfile, delimiter="|"): rows.append(row) pool = ProcessPoolExecutor(max_workers=60) processed = list(pool.map(func, rows)) print(processed) for row in processed: append_to_file(outfile, row) os.replace(outfile, infile)
def threading(): numbers = [(1963309, 2265973), (2030677, 3814172), (1551645, 2229620), (2039045, 2020802)] pool = ProcessPoolExecutor(max_workers=2) ret = list(pool.map(gcd, numbers)) return ret
class ThreadPool(object): '''线程池实现''' def __init__(self, thread_num=1, process_num=1, q_size=2000, daemon=True): self.thread_pool = _ThreadPoolExecutor(thread_num, daemon) self.process_pool = ProcessPoolExecutor(process_num) self.result_queue = Queue(q_size) def wait(self, threads=[]): thread_wait(threads) def add_thread(self, target, args=()): result = self.thread_pool.submit(target, *args) return result def add_process(self, target, args=()): result = self.process_pool.submit(target, *args) return result def thread_map(self, target, args=[]): return [self.thread_pool.submit(target, arg) for arg in args] def process_map(self, target, args=[]): return self.process_pool.map(target, args) def map(self, target, args=[]): return self.process_map(target, args)
class ProcessPoolOpInvoker(ModelOpInvoker): _close_key = None def __init__(self, model, func, n_jobs, persist_method): if isinstance(model, PersistedModel): _log.debug('model already persisted') key = model else: _log.debug('persisting model with method %s', persist_method) key = persist(model, method=persist_method) self._close_key = key _log.debug('persisting function') func = pickle.dumps(func) ctx = LKContext.INSTANCE _log.info('setting up ProcessPoolExecutor w/ %d workers', n_jobs) os.environ['_LK_IN_MP'] = 'yes' kid_tc = proc_count(level=1) self.executor = ProcessPoolExecutor( n_jobs, ctx, _initialize_mp_worker, (key, func, kid_tc, log_queue(), get_root_seed())) def map(self, *iterables): return self.executor.map(_mp_invoke_worker, *iterables) def shutdown(self): self.executor.shutdown() os.environ.pop('_LK_IN_MP', 'yes') if self._close_key is not None: self._close_key.close() del self._close_key
def find_matching_pair(fastq_r1s: Iterable, fastq_r2s: Iterable, primer_seq_fwd: str, primer_seq_rev: str, pool: ProcessPoolExecutor = None) -> Tuple[str, str]: if pool: bools = pool.map( partial(matches_fastq_pair, primer_seq_fwd, primer_seq_rev), fastq_r1s, fastq_r2s) matches = [(str(r1), str(r2)) for r1, r2, is_match in zip(fastq_r1s, fastq_r2s, bools) if is_match] else: matches = [ (str(r1), str(r2)) for r1, r2 in zip(fastq_r1s, fastq_r2s) if matches_fastq_pair(primer_seq_fwd, primer_seq_rev, r1, r2) ] if matches: if len(matches) > 1: logger.warning('More than one match: {}'.format(matches)) # Return the first match on the assumption that inputs rows and files are # ordered similarly. # TODO (gdingle): deal with multi matches better return matches[0] else: raise ValueError( 'Cannot find match for primers {} in {} candidate FastQ file pairs' .format( (primer_seq_fwd, primer_seq_rev), len(list(fastq_r1s)), ))
def convert_dataset(path, filemap, name, num_processes, max_num_support, max_tokens, is_web=True): with open(path, 'rb') as f: dataset = pickle.load(f) if num_processes == 1: instances = process( (dataset, filemap, max_num_support, max_tokens, is_web), True) else: chunk_size = 1000 executor = ProcessPoolExecutor(num_processes) instances = [] i = 0 for processed in executor.map( process, [(dataset[i * chunk_size:(i + 1) * chunk_size], filemap, max_num_support, max_tokens, is_web) for i in range(len(dataset) // chunk_size + 1)]): instances.extend(processed) i += chunk_size print("%d/%d done" % (min(len(dataset), i), len(dataset))) return {"meta": {"source": name}, 'instances': instances}
def main(): results = load_model_results('../results.txt') # Iterate over all groups, save one PDF for group, sorted by confidence. for index, key in enumerate(results): if index in [0, 1, 2, 3, 4]: continue print(' **** Group: ', key, ' **** ') # Extract results for a single group. group_results = results[key] group_len = len(group_results) # Where to save the PDFs. output_directory = 'plots_' + str(key) + '/' in_tuple = zip(group_results, [output_directory] * group_len) # Multiprocessing! max_workers = 12 print('Max workers: ', max_workers) start = time() pool = ProcessPoolExecutor(max_workers=max_workers) results = list(pool.map(plot_row, in_tuple)) # results = list(pool.map(run_NN, search_map)) end = time() print('Took %.3f seconds' % (end - start))
def run_simulation(datasets, workers_num): workers = [TroiaWebDemoUser(get_troia_client(), "TES_TROJ_JID_" + str(i)) for i in xrange(workers_num)] for worker in workers: worker.set_datasets(datasets) executor = ProcessPoolExecutor(workers_num) # maap = map maap = lambda *args, **kwargs: list(executor.map(*args, **kwargs)) maap(exec_fun, workers, repeat(ITERATIONS, workers_num))
def main(chunk): nums = range(1, 1000) pool = ProcessPoolExecutor() count = 0 returned_iterator = pool.map(is_prime, nums, timeout=None, chunksize=chunk) for result in returned_iterator: if result: count += 1 return count
class GeneticSearcher: def __init__(self, pop_size, problem): self.problem = problem self.pop = [Network.random_network() for i in range(pop_size)] self.fitness_cache = {} self.best = None self.nt = NetTester(problem) self.pp = ProcessPoolExecutor(max_workers=4) self.ntf = NetworkTesterFactory(problem) self.pop_size = pop_size def recalculate_fitness(self): nets_to_rate = [net for net in self.pop if net not in self.fitness_cache] for net, res in self.pp.map(self.ntf.rate_network, nets_to_rate): self.fitness_cache[net] = res def selection(self): population_fitness = [(net, self.fitness_cache[net]) for net in self.pop] population_fitness = sorted(population_fitness, reverse=True, key=lambda x: x[1]) self.best = population_fitness[0] return list(map(lambda x: x[0], population_fitness[:int(self.pop_size / 3)])) def crossing(self, parents): children = [] while len(children) < self.pop_size / 3: parents = random.sample(set(parents), 2) children.append(self.problem.crossing(parents[0], parents[1])) return children def mutation(self, population): mutants = [] while len(mutants) < 0.3 * self.pop_size: mutants.append(self.problem.mutate(random.choice(population))) return mutants def iteration(self): self.recalculate_fitness() old_survivors = self.selection() children = self.crossing(old_survivors) mutants = self.mutation(old_survivors) new_generation = old_survivors + children + mutants while len(new_generation) < self.pop_size: new_generation.append(Network.random_network()) self.pop = new_generation return self.best[1] def show_best(self): self.nt.test(self.best[0], render=True)
def compute_pi(nr_tries=10000, pool_size=None, constructor=None): if not constructor: executor = ProcessPoolExecutor(max_workers=pool_size) else: executor = constructor(max_workers=pool_size) args = [(nr_tries//pool_size, ) for _ in range(pool_size)] results = executor.map(partial_pi, args) if not pool_size: pool_size = multiprocessing.cpu_count() return sum(results)/pool_size
def main(): numbers = [ (1963309, 2265973), (2030677, 3814172), (1551645, 2229620), (2039045, 2020802) ] start = time() pool = ProcessPoolExecutor(max_workers=2) results = list(pool.map(gcd, numbers)) end = time() print('Took %.3f seconds' % (end - start))
def make_arch_db(): executor = ProcessPoolExecutor(max_workers=8) by = 10000 m = 60000000 #by = 2000 #m = 10000 e = executor.map(process_range, zip(range(0, m, by),range(by, m+by, by))) executor.shutdown() print('done calculating architectures') pfam_sets = merge(e) print(len(pfam_sets)) gsave(pfam_sets,'pfam_sets.pkl.gz') # mongodb db = MongoClient('wl-cmadmin', 27017).ArchDB_Pfam_071414.ArchDB_Pfam_071414 db.insert(map(lambda item: {'_id': min(item[1]), 'pID': list(item[1]), 'Pfam': item[0]}, pfam_sets.items())) db.ensure_index('pID') db.ensure_index('Pfam')
def main(): cases = {} meta_cases = {} meta_meta_cases = {} if len(sys.argv) != 2: print("Usage: %s <path to binary file>" % sys.argv[0]) return 0 if not os.path.exists(sys.argv[1]): print("No such file %s" % sys.argv[1]) return 1 fsize = os.stat(sys.argv[1]).st_size if fsize < MAX_OPLEN: print("muy pequeño: %s" % sys.argv[1]) return 1 with open(sys.argv[1], "rb") as f: input_data = f.read() pool = ProcessPoolExecutor(CONCURRENCY) for offset in range(0, fsize-20, CONCURRENCY): inputs = [hexlify(input_data[o:o+MAX_OPLEN]) for o in range(offset, offset+CONCURRENCY)] tasks = pool.map(check_hexpairs, inputs) for res in tasks: if not res: continue inskey = res['case'] insmkey = res['metacase'] insmmkey = res['metametacase'] meta_meta_cases[insmmkey] = meta_meta_cases.get(insmmkey, 0) + 1 meta_cases[insmkey] = meta_cases.get(insmkey, 0) + 1 if (meta_cases[insmkey] > MAX_METACASE_EXAMPLES or meta_meta_cases[insmmkey] > MAX_META_META_CASE_EXAMPLES): pass elif inskey not in cases: cases[inskey] = cases.get(inskey, 0) + 1 print("%s\n" % json.dumps(res, indent=4))
def run(): args = get_config() dargs = vars(args) # launch a bunch of processes to look at all systematics if args.systematic == 'all': dargs['quiet'] = True # multiprocessing makes a mess of the outputs systs = get_all_systematics(args.files) syst_args = {x: dargs.copy() for x in systs} for syst in systs: syst_args[syst]['systematic'] = syst executor = Executor() counts_list = executor.map(run_systematic, syst_args.values()) counts_dict = {} for subdict in counts_list: counts_dict.update(subdict) # or just do one... else: counts_dict = run_systematic(dargs) with open(dargs['output'],'w') as out_yml: translated = fitinputs.translate_to_fit_inputs(counts_dict) out_yml.write(yaml.dump(translated))
def main(): arguments = create_parser() if arguments.get('show_version'): print(INTRO) return if 'settings_path' in arguments: sp = arguments['settings_path'] arguments['settings_path'] = os.path.abspath(sp) if os.path.isdir(sp) else os.path.dirname(os.path.abspath(sp)) if not os.path.isdir(arguments['settings_path']): print("WARNING: settings_path dir does not exist: {0}".format(arguments['settings_path'])) if 'virtual_env' in arguments: venv = arguments['virtual_env'] arguments['virtual_env'] = os.path.abspath(venv) if not os.path.isdir(arguments['virtual_env']): print("WARNING: virtual_env dir does not exist: {0}".format(arguments['virtual_env'])) file_names = arguments.pop('files', []) if file_names == ['-']: SortImports(file_contents=sys.stdin.read(), write_to_stdout=True, **arguments) else: if not file_names: file_names = ['.'] arguments['recursive'] = True if not arguments.get('apply', False): arguments['ask_to_apply'] = True config = from_path(os.path.abspath(file_names[0]) or os.getcwd()).copy() config.update(arguments) wrong_sorted_files = False skipped = [] if arguments.get('recursive', False): file_names = iter_source_code(file_names, config, skipped) num_skipped = 0 if config['verbose'] or config.get('show_logo', False): print(INTRO) jobs = arguments.get('jobs') if jobs: executor = ProcessPoolExecutor(max_workers=jobs) for sort_attempt in executor.map(functools.partial(sort_imports, **arguments), file_names): if not sort_attempt: continue incorrectly_sorted = sort_attempt.incorrectly_sorted if arguments.get('check', False) and incorrectly_sorted: wrong_sorted_files = True if sort_attempt.skipped: num_skipped += 1 else: for file_name in file_names: try: sort_attempt = SortImports(file_name, **arguments) incorrectly_sorted = sort_attempt.incorrectly_sorted if arguments.get('check', False) and incorrectly_sorted: wrong_sorted_files = True if sort_attempt.skipped: num_skipped += 1 except IOError as e: print("WARNING: Unable to parse file {0} due to {1}".format(file_name, e)) if wrong_sorted_files: exit(1) num_skipped += len(skipped) if num_skipped and not arguments.get('quiet', False): if config['verbose']: for was_skipped in skipped: print("WARNING: {0} was skipped as it's listed in 'skip' setting" " or matches a glob in 'skip_glob' setting".format(was_skipped)) print("Skipped {0} files".format(num_skipped))
if not os.path.exists(data): os.mkdir(data) minute = os.path.join(data, 'minute') if not os.path.exists(minute): os.mkdir(minute) generate_stocks(freq=pd.Timedelta(seconds=120), start=pd.Timestamp('2010-01-01'), directory=minute) def convert_to_json(d): filenames = sorted(glob(os.path.join(d, '*')))[-365:] with open(d.replace('minute', 'json') + '.json', 'w') as f: for fn in filenames: df = pd.read_csv(fn) for rec in df.to_dict(orient='records'): json.dump(rec, f) f.write(os.linesep) print("Finished JSON: %s" % d) js = os.path.join(data, 'json') if not os.path.exists(js): os.mkdir(js) directories = sorted(glob(os.path.join(minute, '*'))) e = ProcessPoolExecutor() list(e.map(convert_to_json, directories))
numbers = [(1963309, 2265973), (2030677, 3814172), (1551645, 2229620), (2039045, 2020802)] start = time() results = list(map(gcd, numbers)) end = time() print('Took %.3f seconds' % (end - start)) print("using multiprocessing...") try: start = time() pool = ProcessPoolExecutor(max_workers=2) # The one change results = list(pool.map(gcd, numbers)) end = time() print('Took %.3f seconds' % (end - start)) except RuntimeError as e: print("another windows fail....") # Item 42 ... print("====ITEM 42: Define Function Decorators with functools.wraps ====") print("func tools!!") def my_dec(func): def wrapper(): print("in wrapper") return func()
def main_3(): start = time.time() pool = ProcessPoolExecutor(max_workers=8) list(pool.map(gcd, numbers)) end = time.time() return 'Took %.3f seconds' % (end - start)
from concurrent.futures import ProcessPoolExecutor def is_odd_number(number): return number % 2 executor = ProcessPoolExecutor() it = executor.map(is_odd_number, [1, 2], timeout=1) print(next(it)) print(next(it))
'AdjPval', 'Group1Name', 'AI', 'AI-pval', 'AI-null'] fname = 'more_phylip_BenjRes.tsv' benj_writer = csv.DictWriter(open(fname, 'w'), benj_fields, delimiter = '\t') benj_writer.writeheader() multi = True print 'Starting multiprocessing!' if multi: pool = ProcessPoolExecutor(max_workers = 30) results = pool.map(calculate_region, yield_regions(trop_dict)) else: results = imap(calculate_region, islice(yield_regions(trop_dict), 0,35)) for gname, sub, prot, win, start, benj_res in results: #print prot, start, win tdict = { 'Prot':prot, 'Start':start, 'WinSize':win, 'GroupName':gname, 'Subtype':sub, } if type(benj_res) is StringType: if (benj_res == 'Already Processed') or benj_res.startswith('Too few unique sequences'):
def do_multi_process(): start = time() pool = ProcessPoolExecutor(max_workers=2) result = list(pool.map(gcd, numbers)) end = time() print('Took %.3f seconds' % (end - start))
class ScoreProcessor: IO_WORKER_MULTIPLIER = 0.25 MIN_IO_WORKERS = 2 MAX_IO_WORKERS = 10 def __init__(self, scoring_model, extractor, cpu_workers=None, io_workers=None, batch_size=50): self.scoring_model = scoring_model self.extractor = extractor self.cpu_workers = \ int(cpu_workers) if cpu_workers is not None else cpu_count() self.batch_size = int(batch_size) if io_workers is not None: self.io_workers = int(io_workers) else: self.io_workers = max(self.MIN_IO_WORKERS, min(self.MAX_IO_WORKERS, int(self.cpu_workers * self.IO_WORKER_MULTIPLIER))) logger.info("Starting up IO thread pool with {0} workers" .format(self.io_workers)) self.scores_ex = ThreadPoolExecutor(max_workers=self.io_workers) logger.info("Starting up CPU thread pool with {0} workers" .format(self.cpu_workers)) self.process_ex = ProcessPoolExecutor(max_workers=self.cpu_workers) roots = dependencies.dig(self.scoring_model.features) self.root_datasources = [d for d in roots if isinstance(d, Datasource)] def __enter__(self): return self def __exit__(self): self.scores_executor.shutdown() self.process_executor.shutdown() def score(self, rev_ids, caches=None, cache=None): if isinstance(rev_ids, int): rev_ids = [rev_ids] batches = batch_rev_caches(chunked(rev_ids, self.batch_size), caches, cache) for batch_scores in self.scores_ex.map(self._score_batch, batches): for score in batch_scores: yield score def _score_batch(self, batch_rev_cache): id_batch, caches, cache = batch_rev_cache logger.debug("running _score_batch() on {0} rev_ids" .format(len(id_batch))) error_values = self.extractor.extract( id_batch, self.root_datasources, caches=caches, cache=cache) e_r_caches = self._group_error_root_caches( id_batch, error_values, caches, cache) rev_scores = self.process_ex.map(self._process_score, e_r_caches) return list(rev_scores) def _group_error_root_caches(self, id_batch, error_values, caches, cache): for rev_id, (error, vals) in zip(id_batch, error_values): if error: score_cache = {} scoring_model = None extractor = None else: score_cache = {} score_cache.update(cache or {}) score_cache.update((caches or {}).get(rev_id, {})) score_cache.update({rd: rv for rd, rv in zip(self.root_datasources, vals)}) scoring_model = self.scoring_model extractor = self.extractor yield (rev_id, scoring_model, extractor, score_cache, error) @classmethod def _process_score(cls, e_r_caches): rev_id, scoring_model, extractor, cache, error = e_r_caches logger.debug("running _process_score() on {0}".format(rev_id)) if error is None: try: feature_values = list(extractor.solve( scoring_model.features, cache=cache)) except Exception as error: logger.debug("An error occured during feature extraction") raise error return rev_id, error_score(error) try: score = scoring_model.score(feature_values) return rev_id, score except Exception as error: logger.debug("An error occured during scoring") return rev_id, error_score(error) else: return rev_id, error_score(error)