def test_wrap(): x = [(-1, -2), (3, -4)] y = [3, 1] assert list(map(addabs, x)) == y from pathos.pools import ProcessPool as Pool assert Pool().map(addabs, x) == y from pathos.pools import ParallelPool as Pool assert Pool().map(addabs, x) == y
def find_transcript_mutations(self, paths, processes=1): """Create a `DataFrame` of mutation counts, where the row indices are cell names and the column indices are gene names.""" def init_process(aa_mutation_finder): global current_process_aa_mutation_finder current_process_aa_mutation_finder = aa_mutation_finder def process_cell(path): return ( Path(path).stem, current_process_aa_mutation_finder.find_cell_gene_aa_mutations( path=path)) if processes > 1: with Pool(processes, initializer=init_process, initargs=(self, )) as pool: results = list( tqdm(pool.imap(process_cell, paths), total=len(paths), smoothing=0.01)) else: init_process(self) results = list(map(process_cell, tqdm(paths))) return self._make_mutation_counts_df(results)
def _meanfield_sgdstep_components(self, y, x, scores, prob, stepsize, nb_threads=4): if nb_threads == 1: for idx, (b, m) in enumerate(zip(self.basis, self.models)): b.meanfield_sgdstep(x, [_score[:, idx] for _score in scores], prob, stepsize) m.meanfield_sgdstep(y, x, [_score[:, idx] for _score in scores], prob, stepsize) else: def _loop(idx): self.basis[idx].meanfield_sgdstep( x, [_score[:, idx] for _score in scores], prob, stepsize) self.models[idx].meanfield_sgdstep( y, x, [_score[:, idx] for _score in scores], prob, stepsize) with Pool(threads=nb_threads) as p: p.map(_loop, range(self.likelihood.size))
def run_count(keywords_list, url_path, use_cashed=True, process_num=8): if len(keywords_list) == 0: return None keywords_list = [ clean_text(i) for i in sorted(set(keywords_list), key=keywords_list.index) ] res = [['Name', 'Org', 'Year']] res[0].extend(keywords_list) with open(url_path, 'r') as f: pdf_tuples_list = [i for i in yield_tuples(json.load(f))] keywords_list = [keywords_list for _ in pdf_tuples_list] errs = [] with tqdm(total=len(pdf_tuples_list), ncols=80) as pbar: with Pool(process_num) as pool: pool_iter = pool.imap(count_keywords_in_one_pdf, pdf_tuples_list, keywords_list) for r in pool_iter: # print(r) if 'ERROR' in str(r): errs.append(r) elif r: res.append(r) # not None pbar.update() if len(errs) != 0: time_now = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) with open(f'FIND ERR_{time_now}.log', 'w', encoding='utf8') as err_file: for i in errs: err_file.write(f'{i}\n') return res
def test01_bgsave_stress(self): n_nodes = 1000 n_iterations = 10 conn = self.env.getConnection() graphs[0].query("CREATE INDEX ON :Node(val)") pool = Pool(nodes=5) t1 = pool.apipe(create_nodes, graphs[0], n_iterations, n_nodes) t2 = pool.apipe(delete_nodes, graphs[1], n_iterations, n_nodes / 2) t3 = pool.apipe(read_nodes, graphs[2], n_iterations) t4 = pool.apipe(update_nodes, graphs[3], n_iterations) t5 = pool.apipe(BGSAVE_loop, self.env, conn, 3) # wait for processes to join t1.wait() t2.wait() t3.wait() t4.wait() t5.wait() # make sure we did not crashed conn.ping() conn.close()
def find_mutation_counts(self, paths, processes=4): """Create a `DataFrame` of mutation counts, where the row indices are cell names and the column indices are gene names.""" def init_process(mutation_counter): global current_process_mutation_counter current_process_mutation_counter = mutation_counter def process_cell(path): cell_name = Path(path).stem return (cell_name, current_process_mutation_counter.find_cell_gene_mut_counts( path=path)) if processes > 1: with Pool(processes, initializer=init_process, initargs=(self, )) as pool: results = list( tqdm(pool.imap(process_cell, paths), total=len(paths), smoothing=0.01)) else: results = list(map(process_cell, tqdm(paths))) cell_genemuts_pairs, filtered_cell_genmuts_pairs = [], [] for cell_name, (mutation_counts, filtered_mutation_counts) in results: cell_genemuts_pairs.append((cell_name, mutation_counts)) filtered_cell_genmuts_pairs.append( (cell_name, filtered_mutation_counts)) return ( self._make_mutation_counts_df(cell_genemuts_pairs), self._make_mutation_counts_df(filtered_cell_genmuts_pairs), )
def clear_stack(cls): cpu = cpu_count() pool = Pool(cpu) def classify_set(c, pd, pc): pd = Directory.classify_product_auto(config, pd) # if product.part_id: pc.product = pd Directory.set_price(pc) # else: # manuals.append((config, product, price)) # manuals = [] for config, product, price in cls.STACK: pool.apply_async(classify_set, args=(config, product, price)) # for config, product, price in manuals: # product = Directory.classify_product_manual(config, product) # if product.part_id: # price.product = product # Directory.set_price(price) # else: # Directory.set_product(product) cls.STACK = []
def test_calc_uncertainty_pool_pass(self): """Test parallel compute the uncertainty distribution for an impact""" exp_unc, impf_unc, _ = make_input_vars() haz = haz_dem() unc_calc = CalcImpact(exp_unc, impf_unc, haz) unc_data = unc_calc.make_sample(N=2) pool = Pool(nodes=2) try: unc_data = unc_calc.uncertainty(unc_data, calc_eai_exp=False, calc_at_event=False, pool=pool) finally: pool.close() pool.join() pool.clear() self.assertEqual(unc_data.unit, exp_dem().value_unit) self.assertListEqual(unc_calc.rp, [5, 10, 20, 50, 100, 250]) self.assertEqual(unc_calc.calc_eai_exp, False) self.assertEqual(unc_calc.calc_at_event, False) self.assertEqual(unc_data.aai_agg_unc_df.size, unc_data.n_samples) self.assertEqual(unc_data.tot_value_unc_df.size, unc_data.n_samples) self.assertEqual(unc_data.freq_curve_unc_df.size, unc_data.n_samples * len(unc_calc.rp)) self.assertTrue(unc_data.eai_exp_unc_df.empty) self.assertTrue(unc_data.at_event_unc_df.empty)
def test_set_one_file_pass(self): """ Test set function set_from_tracks with one input.""" pool = Pool() tc_track = TCTracks(pool) tc_track.read_processed_ibtracs_csv(TEST_TRACK) tc_track.calc_random_walk() tc_track.equal_timestep() tc_haz = TropCyclone(pool) tc_haz.set_from_tracks(tc_track, CENTR_TEST_BRB) tc_haz.check() pool.close() pool.join() self.assertEqual(tc_haz.tag.haz_type, 'TC') self.assertEqual(tc_haz.tag.description, '') self.assertEqual(tc_haz.units, 'm/s') self.assertEqual(tc_haz.centroids.size, 296) self.assertEqual(tc_haz.event_id.size, 10) self.assertTrue(isinstance(tc_haz.intensity, sparse.csr.csr_matrix)) self.assertTrue(isinstance(tc_haz.fraction, sparse.csr.csr_matrix)) self.assertEqual(tc_haz.intensity.shape, (10, 296)) self.assertEqual(tc_haz.fraction.shape, (10, 296))
def UQ(start, end, lower, upper): #from pathos.pools import ProcessPool as Pool from pathos.pools import ThreadPool as Pool #from pool_helper import func_pickle # if fails to pickle, try using a helper # run optimizer for each subdiameter lb = [lower + [lower[i]] for i in range(start, end + 1)] ub = [upper + [upper[i]] for i in range(start, end + 1)] nb = [nbins[:] for i in range(start, end + 1)] for i in range(len(nb)): nb[i][-1] = nb[i][i] cf = [costFactory(i) for i in range(start, end + 1)] #cf = [func_pickle(i) for i in cf] #cf = [cost.name for cost in cf] nnodes = len(lb) #construct cost function and run optimizer results = Pool(nnodes).map(optimize, cf, lb, ub, nb) #print("results = %s" % results) results = list(zip(*results)) diameters = list(results[0]) function_evaluations = list(results[1]) total_func_evals = sum(function_evaluations) total_diameter = sum(diameters) print("subdiameters (squared): %s" % diameters) print("diameter (squared): %s" % total_diameter) print("func_evals: %s => %s" % (function_evaluations, total_func_evals)) return total_diameter
def optimize(cost,lb,ub): from pathos.pools import ProcessPool as Pool from mystic.solvers import DifferentialEvolutionSolver2 from mystic.termination import CandidateRelativeTolerance as CRT from mystic.strategy import Best1Exp from mystic.monitors import VerboseMonitor, Monitor from mystic.tools import random_seed random_seed(123) #stepmon = VerboseMonitor(100) stepmon = Monitor() evalmon = Monitor() ndim = len(lb) # [(1 + RVend) - RVstart] + 1 solver = DifferentialEvolutionSolver2(ndim,npop) solver.SetRandomInitialPoints(min=lb,max=ub) solver.SetStrictRanges(min=lb,max=ub) solver.SetEvaluationLimits(maxiter,maxfun) solver.SetEvaluationMonitor(evalmon) solver.SetGenerationMonitor(stepmon) solver.SetMapper(Pool().map) tol = convergence_tol solver.Solve(cost,termination=CRT(tol,tol),strategy=Best1Exp, \ CrossProbability=crossover,ScalingFactor=percent_change) print("solved: %s" % solver.bestSolution) scale = 1.0 diameter_squared = -solver.bestEnergy / scale #XXX: scale != 0 func_evals = solver.evaluations return diameter_squared, func_evals
def test_09_concurrent_multiple_readers_after_big_write(self): # Test issue #890 redis_graphs = [] for i in range(0, CLIENT_COUNT): redis_con = self.env.getConnection() redis_graphs.append(Graph("G890", redis_con)) redis_graphs[0].query( """UNWIND(range(0,999)) as x CREATE()-[:R]->()""") read_query = """MATCH (n)-[r:R]->(m) RETURN n, r, m""" queries = [read_query] * CLIENT_COUNT pool = Pool(nodes=CLIENT_COUNT) # invoke queries m = pool.amap(thread_run_query, redis_graphs, queries) # wait for processes to return m.wait() # get the results result = m.get() for i in range(CLIENT_COUNT): if isinstance(result[i], str): self.env.assertIsNone(result[i]) else: self.env.assertEquals(1000, len(result[i].result_set))
def test01_bgsave_stress(self): n_reads = 50000 n_creations = 50000 n_updates = n_creations / 10 n_deletions = n_creations / 2 conn = self.env.getConnection() graphs[0].query("CREATE INDEX FOR (n:Node) ON (n.v)") pool = Pool(nodes=5) t1 = pool.apipe(create_nodes, graphs[0], n_creations) t2 = pool.apipe(delete_nodes, graphs[1], n_deletions) t3 = pool.apipe(read_nodes, graphs[2], n_reads) t4 = pool.apipe(update_nodes, graphs[3], n_updates) t5 = pool.apipe(BGSAVE_loop, self.env, conn, 10000) # wait for processes to join t1.wait() t2.wait() t3.wait() t4.wait() t5.wait() # make sure we did not crashed conn.ping() conn.close()
def test_07_concurrent_write_rename(self): # Test setup - validate that graph exists and possible results are None graphs[0].query("MATCH (n) RETURN n") pool = Pool(nodes=1) redis_con = self.env.getConnection() new_graph = GRAPH_ID + "2" # Create new empty graph with id GRAPH_ID + "2" redis_con.execute_command("GRAPH.QUERY", new_graph, """MATCH (n) return n""", "--compact") heavy_write_query = """UNWIND(range(0,999999)) as x CREATE(n)""" writer = pool.apipe(thread_run_query, graphs[0], heavy_write_query) redis_con.rename(GRAPH_ID, new_graph) writer.wait() # Possible scenarios: # 1. Rename is done before query is sent. The name in the graph context is new_graph, so when upon commit, when trying to open new_graph key, it will encounter an empty key since new_graph is not a valid key. # Note: As from https://github.com/RedisGraph/RedisGraph/pull/820 this may not be valid since the rename event handler might actually rename the graph key, before the query execution. # 2. Rename is done during query executing, so when commiting and comparing stored graph context name (GRAPH_ID) to the retrived value graph context name (new_graph), the identifiers are not the same, since new_graph value is now stored at GRAPH_ID value. possible_exceptions = [ "Encountered different graph value when opened key " + GRAPH_ID, "Encountered an empty key when opened key " + new_graph ] result = writer.get() if isinstance(result, str): self.env.assertContains(result, possible_exceptions) else: self.env.assertEquals(1000000, result.nodes_created)
def test_pp(): from pathos.pools import ParallelPool as Pool pool = Pool(nodes=4) check_sanity(pool) check_maps(pool, items, delay) check_dill(pool) check_ready(pool, maxtries, delay, verbose=False)
def main(argv): print('Input/Output data folder: ', DATA_DIR) # set parallel processing pool = Pool() # exposures expo_dict = calc_exposure(DATA_DIR) # tracks sel_tr = calc_tracks(DATA_DIR, pool) # dictionary of tc per island tc_dict = calc_tc(expo_dict, sel_tr, DATA_DIR, pool) # damage per isl imp_dict = calc_imp(expo_dict, tc_dict, DATA_DIR) # damage irma get_irma_damage(imp_dict) # average annual impact aai_isl(imp_dict) # compute impact exceedance frequency get_efc_isl(imp_dict) # FIG03 and FIG04 fig03_fig04(DATA_DIR, FIG_DIR) # 5min # FIG 06 fig06(DATA_DIR, FIG_DIR) pool.close() pool.join()
def run_N(self, nb_execution=10, loop=100, grphq=False, pas=10, duration_gif=0.5): """ Exécute N itération de l'algorithme des k-means, et conserve les centres qui produisent le moins d'erreur. Chaque itération est produite à partir de centres initiaux aléatoires, donc les résultats sont différents à chaque fois. Retourne cette erreur minimale. Les paramètres d'entrée sont les même que pour run, avec l'ajout de : nb_execution : entier désignant le nombre de calcul de k-means à faire. """ f = partial(self.__k_run, loop=loop, grphq=grphq, pas=pas) pool = Pool(self.cpu) memory = list(pool.uimap(f, range(nb_execution))) pool.close() pool.join() ind = np.argmin(np.array([m[0] for m in memory])) means = memory[ind][1] self.means = means self.calc_grp() if grphq: self.grphq.create_gif(duration=duration_gif) del pool return memory[ind][0]
def test_calc_uncertainty_pool_pass(self): """Test compute the uncertainty distribution for an impact""" ent_iv, _ = make_costben_iv() _, _, haz_iv = make_input_vars() unc_calc = CalcCostBenefit(haz_iv, ent_iv) unc_data = unc_calc.make_sample(N=2) pool = Pool(n=2) try: unc_data = unc_calc.uncertainty(unc_data, pool=pool) finally: pool.close() pool.join() pool.clear() self.assertEqual(unc_data.unit, ent_dem().exposures.value_unit) self.assertEqual(unc_data.tot_climate_risk_unc_df.size, unc_data.n_samples) self.assertEqual( unc_data.cost_ben_ratio_unc_df.size, unc_data.n_samples * 4 #number of measures ) self.assertEqual(unc_data.imp_meas_present_unc_df.size, 0) self.assertEqual( unc_data.imp_meas_future_unc_df.size, unc_data.n_samples * 4 * 5 #All measures 4 and risks/benefits 5 )
def test_08_concurrent_write_replace(self): # Test setup - validate that graph exists and possible results are None self.graph.query("MATCH (n) RETURN n") pool = Pool(nodes=1) heavy_write_query = """UNWIND(range(0,999999)) as x CREATE(n) RETURN count(n)""" writer = pool.apipe(thread_run_query, heavy_write_query, None) set_result = self.conn.set(GRAPH_ID, "1") writer.wait() possible_exceptions = [ "Encountered a non-graph value type when opened key " + GRAPH_ID, "WRONGTYPE Operation against a key holding the wrong kind of value" ] result = writer.get() if isinstance(result, str): # If the SET command attempted to execute while the CREATE query was running, # an exception should have been issued. self.env.assertContains(result, possible_exceptions) else: # Otherwise, both the CREATE query and the SET command should have succeeded. self.env.assertEquals(1000000, result.result_set[0][0]) self.env.assertEquals(set_result, True) # Delete the key self.conn.delete(GRAPH_ID)
def run_concurrent(env, queries, f): pool = Pool(nodes=CLIENT_COUNT) # invoke queries result = pool.map(f, graphs, queries) # validate all process return true env.assertTrue(all(result))
def test_04_concurrent_delete(self): pool = Pool(nodes=CLIENT_COUNT) # invoke queries assertions = pool.map(delete_graph, graphs) # Exactly one thread should have successfully deleted the graph. self.env.assertEquals(assertions.count(True), 1)
def run_concurrent(queries, f): pool = Pool(nodes=CLIENT_COUNT) manager = pathos_multiprocess.Manager() barrier = manager.Barrier(CLIENT_COUNT) barriers = [barrier] * CLIENT_COUNT # invoke queries return pool.map(f, queries, barriers)
def map_parallel_progress(func, items: List[Any], processes: int = cpu_count()): with Pool(processes=processes) as pool: with tqdm.tqdm(total=len(items)) as pbar: results = [] for result in pool.imap(func, items): pbar.update() results.append(result) return results
def test_10_write_starvation(self): # make sure write query do not starve # when issuing a large number of read queries # alongside a single write query # we dont want the write query to have to wait for # too long, consider the following sequence: # R, W, R, R, R, R, R, R, R... # if write is starved our write query might have to wait # for all queued read queries to complete while holding # Redis global lock, this will hurt performance # # this test issues a similar sequence of queries and # validates that the write query wasn't delayed too much self.graph = Graph(self.conn, GRAPH_ID) pool = Pool(nodes=CLIENT_COUNT) Rq = "UNWIND range(0, 10000) AS x WITH x WHERE x = 9999 RETURN 'R', timestamp()" Wq = "UNWIND range(0, 1000) AS x WITH x WHERE x = 27 CREATE ({v:1}) RETURN 'W', timestamp()" Slowq = "UNWIND range(0, 100000) AS x WITH x WHERE (x % 73) = 0 RETURN count(1)" # issue a number of slow queries, this will give us time to fill up # RedisGraph internal threadpool queue queries = [Slowq] * CLIENT_COUNT * 5 nulls = [None] * CLIENT_COUNT * 5 # issue queries asynchronously pool.imap(thread_run_query, queries, nulls) # create a long sequence of read queries queries = [Rq] * CLIENT_COUNT * 10 nulls = [None] * CLIENT_COUNT * 10 # inject a single write query close to the begining on the sequence queries[CLIENT_COUNT] = Wq # invoke queries # execute queries in parallel results = pool.map(thread_run_query, queries, nulls) # count how many queries completed before the write query count = 0 write_ts = results[CLIENT_COUNT]["result_set"][0][1] for result in results: row = result["result_set"][0] ts = row[1] if ts < write_ts: count += 1 # make sure write query wasn't starved self.env.assertLessEqual(count, len(queries) * 0.3) # delete the key self.conn.delete(GRAPH_ID)
def test00_stress(self): ids = range(self.client_count) pool = Pool(nodes=self.client_count) # invoke queries pool.map(query_crud, graphs, ids) # make sure we did not crashed conn = self.env.getConnection() conn.ping() conn.close()
def main(query=DEFAULT_QUERY, number_of_processes=DEFAULT_PROCESSES, base_dir=DEFAULT_BASE_DIR, file=None): saver = DocSaver(base_dir) pool = Pool(number_of_processes) if file: docs = docs_from_file(file) else: docs = docs_from_query(query) download(docs, pool, saver)
def test_read_raster_pool_pass(self): """Test from_raster constructor with pool""" from pathos.pools import ProcessPool as Pool pool = Pool() haz_fl = Hazard.from_raster([HAZ_DEMO_FL], haz_type='FL', pool=pool) haz_fl.check() self.assertEqual(haz_fl.intensity.shape, (1, 1032226)) self.assertEqual(haz_fl.intensity.min(), -9999.0) self.assertAlmostEqual(haz_fl.intensity.max(), 4.662774085998535) pool.close() pool.join()
def liste_machines_allumees(machines): machinesTemp = [] pool = Pool(nodes=10) machinesTemp = pool.map(recherche, machines) #On ne garde que les machines allumees machinesAllumees = [] for val in machinesTemp: if val != None: machinesAllumees.append(val) return machinesAllumees
def parallel_ilr_inference(nb_jobs=50, **kwargs): kwargs_list = [] for n in range(nb_jobs): kwargs['seed'] = n kwargs_list.append(kwargs.copy()) with Pool(processes=min(nb_jobs, nb_cores), initializer=tqdm.set_lock, initargs=(tqdm.get_lock(), )) as p: res = p.map(_job, kwargs_list) return res
def test05_index_delete(self): def create_drop_index(graph_id): env = Env(decodeResponses=True) redis_con = env.getConnection() for _ in range(1, 100): pipe = redis_con.pipeline() pipe.execute_command("GRAPH.QUERY", f"x{graph_id}", "CREATE (a:L), (n:L), (n)-[:T]->(a)") pipe.execute_command("GRAPH.QUERY", f"x{graph_id}", "CREATE INDEX FOR ()-[n:T]-() ON (n.p)") pipe.execute() redis_con.execute_command("GRAPH.DELETE", f"x{graph_id}") pool = Pool(nodes=10) pool.map(create_drop_index, range(1, 100))