def __init__(self, model, step_finish, args = None, split = 0, buffer = 6,\ recombine = None,recombine_args = None, verbose = False, \ boundary_pass = 1): self.model = self.grid_adjust(model) #add in function to finish steps self.step_finish = step_finish self.step_args = args #Get the number of CPUs unless user specified if split == 0: self.ncpus = cpu_count() else: self.ncpus = split #create the number of process available self.pool = ProcessPool(nodes=self.ncpus) self.pipes = self.pipe_setup(self.ncpus) self.buffer = buffer self.multi_models = collections.OrderedDict() #dictionary to track when all steps on each processor complete self.sync_status = collections.OrderedDict() #add ability for user to deconflict self.boundary_pass = boundary_pass if recombine == None: self.recombine = self.recombine_default self.recombine_args = recombine_args else: self.recombine = recombine self.recombine_args = recombine_args self.verbose = verbose
def load_data_from_files_raw( data_files: Iterable[Path], # humm that is not very nice type signature... need to create interface for that parse_callback: Callable[..., Tuple[str, int, Iterable[T_Single]]], # type: ignore parallelize: bool, *args, ) -> Dict[str, Tuple[int, Iterable[T_Single]]]: tasks_as_args = [[data_file, *args] for data_file in data_files] if parallelize: pool = ProcessPool() # needed that hack to work... issues with serialization of classes # doesn't work with basic multiprocessing so needed pathos def cb(x): return parse_callback(*x) per_file_results = list(pool.map(cb, tasks_as_args)) else: per_file_results = [parse_callback(*task_args) for task_args in tasks_as_args] # type: ignore lang_samples_iter: Dict[str, Tuple[int, List[Iterable[T_Single]]]] = {} for (lang, lg, samples_iter) in per_file_results: if lang not in lang_samples_iter: lang_samples_iter[lang] = (0, []) (lg0, iters) = lang_samples_iter[lang] iters.append(samples_iter) lang_samples_iter[lang] = (lg0 + lg, iters) lang_samples: Dict[str, Tuple[int, Iterable[T_Single]]] = {} for (lang, (lg, iters)) in lang_samples_iter.items(): lang_samples[lang] = (lg, itertools.chain(*iters)) return lang_samples
def download(self, index_path, txt_dir): # Save to txt dir self.txt_dir = txt_dir if not os.path.exists(self.txt_dir): os.makedirs(self.txt_dir) # Count Total Urls to Process with open(index_path, 'r') as fin: num_urls = sum(1 for line in fin) def iter_path_generator(index_path): with open(index_path, 'r') as fin: reader = csv.reader(fin, delimiter=',', quotechar='\"', quoting=csv.QUOTE_ALL) for url_idx, row in enumerate(reader, 1): form_type, company_name, cik, date_filed, filename = row url = os.path.join(SEC_GOV_URL, filename).replace("\\", "/") yield (url_idx, url) def download_job(obj): url_idx, url = obj fname = '_'.join(url.split('/')[-2:]) fname, ext = os.path.splitext(fname) htmlname = fname + '.html' text_path = os.path.join(self.txt_dir, fname + '.txt') if os.path.exists(text_path): print("Already exists, skipping {}...".format(url)) sys.stdout.write("\033[K") else: print("Total: {}, Downloading & Parsing: {}...".format( num_urls, url_idx)) sys.stdout.write("\033[K") r = requests.get(url) try: # Parse html with Beautiful Soup soup = BeautifulSoup(r.content, "html.parser") text = soup.get_text("\n") # Process Text text = self._process_text(text) text_path = os.path.join(self.txt_dir, fname + '.txt') # Write to file with codecs.open(text_path, 'w', encoding='utf-8') as fout: fout.write(text) except BaseException as e: print("{} parsing failed: {}".format(url, e)) ncpus = cpu_count() if cpu_count() <= 8 else 8 pool = ProcessPool(ncpus) pool.map(download_job, iter_path_generator(index_path))
def run(self): """ Run experiment """ num_drivers = np.arange(1000, 6500, 500) # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for drivers in num_drivers: self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters'][ 'city_states_filename'] = "city_states.dill" self.config['RL_parameters']['num_drivers'] = drivers self.config['RL_parameters']['num_strategic_drivers'] = drivers configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_02") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_02") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def main(args): setup = experiment_setups.parse(args.setup) dirname = fileutil.run_dir(args.dest_dir, setup.name, args.max_quantifier_length, args.model_size, args.name) file_util = FileUtil(dirname) languages = language_loader.load_languages(file_util) if args.comp_strat == 'wordcount': complexity_measurer = WordCountComplexityMeasurer(args.max_words) elif args.comp_strat == 'wordcomplexity': complexity_measurer = SumComplexityMeasurer(args.max_words, 1) elif args.comp_strat == 'special': complexity_measurer = SpecialComplexityMeasurer(args.max_words) else: raise ValueError('{0} is not a valid complexity strategy.'.format( args.comp_strat)) with ProcessPool(nodes=args.processes) as pool: complexity = pool.map(complexity_measurer, languages) file_util.dump_dill(complexity, 'complexity_{0}.dill'.format(args.comp_strat)) print("measure_complexity.py finished.")
def run(self): """ Run experiment """ num_drivers = np.arange(1000, 6500, 500) thresholds = np.arange(5, 55, 5) thresholds = np.insert(thresholds, 0, 2) combinations = list(itertools.product(num_drivers, thresholds)) # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for comb in combinations: self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters']['num_drivers'] = comb[0] self.config['RL_parameters']['imbalance_threshold'] = comb[1] configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_04") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_04") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def gen_operator_data(self, space, Nx, M, num, representation): print("Generating operator data...", flush=True) features = space.random(num) # Generate outputs x = np.linspace(0, self.T, num=self.Nx)[:, None] sensor_values = self.random_process(space.eval_u(features, x, self.M)) # exp(b) p = ProcessPool(nodes=config.processes) s_values = np.array(p.map(self.eval_s, sensor_values)) # Generate inputs sensors = np.linspace(0, self.T, num=Nx)[:, None] if representation == "samples": sensor_values = self.random_process( space.eval_u(features, sensors, M)) elif representation == "KL": sensor_values = space.eval_KL_bases(features, sensors, M) # sensor_values = self.random_process(sensor_values) res = [ make_triple(sensor_values[i], x, s_values[i], self.npoints_output) for i in range(num) ] res = np.vstack(res) m = sensor_values.shape[1] return [res[:, :m], res[:, m:-1]], res[:, -1:]
def update_qfunction(self): if self.TWIN_Q: self.i = (self.i + 1) % 2 if self.theta_q is None: # generate critic network if none exist n = len(self.state_action_basis(self.state, self.action)) if self.TWIN_Q: m = 2 # generate 2 q networks else: m = 1 self.theta_q = np.random.normal(0, 0.3, (n, m)) self.q_predicted = self.theta_q[:, self. i] @ self.xu_k # recorded for analysis self.q_observed = self.r + self.BETA * self.theta_q[:, self. i] @ self.xu_k1 # recorded for analysis if len(self.memory) > self.BATCH_SIZE: batch = random.sample(self.memory, self.BATCH_SIZE) pool = ProcessPool(nodes=self.config['simulation']['n_nodes']) batch_y = np.array(pool.map(self.process_exp, batch)) batch_phi = np.array([ self.state_action_basis(exp['state'], exp['action']) for exp in batch ]) clf = Ridge(alpha=0.01) clf.fit(batch_phi, batch_y) temp_theta = clf.coef_ self.theta_q[:, self.i] = self.ALPHA_q * temp_theta + ( 1 - self.ALPHA_q) * self.theta_q.flatten()
def get_full_content(json_data, num_cores): def param_generator(json_data): for data in json_data: yield data['id'], data['url'] def tag_and_write_job(param): num_x, news_url = param logging.info("Processing news #{}: {}".format(num_x, news_url)) news_content = GetUrlContent(news_url) ## store all news (might be used for word2vec) ## with open('OpinionAnalysis/data/news_corpus.txt', 'a') as fp: fp.write('*\n') fp.write(news_content) return {'id':num_x, 'content':news_content} pool = ProcessPool(num_cores) new_json_data = pool.map(tag_and_write_job, param_generator(json_data)) df = pd.DataFrame(json_data) df_new = pd.DataFrame(new_json_data) ret_df = df.merge(df_new, left_on='id', right_on='id') ret_json = json.loads(ret_df.to_json(orient='records')) return ret_json
def main(): processes = args.processes setup = experiment_setups.parse(args.setup) max_quantifier_length = args.max_quantifier_length model_size = args.model_size file_util = FileUtil(fileutil.base_dir(args.dest_dir, setup.name, max_quantifier_length, model_size)) universe = setup.generate_models(model_size) folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir,setup.name,max_quantifier_length,model_size) os.makedirs("{0}".format(folderName), exist_ok=True) processpool = ProcessPool(nodes=processes) expression_generator = ExpressionGenerator(setup, model_size, universe, processpool) (generated_expressions_dict, expressions_by_meaning) = \ expression_generator.generate_all_expressions(max_quantifier_length) print("{0} expressions!".format(len(expressions_by_meaning[bool].values()))) file_util.dump_dill(expressions_by_meaning[bool], 'generated_expressions.dill') file_util.dump_dill(list(expressions_by_meaning[bool].values()), 'expressions.dill') file_util.dump_dill(list(expressions_by_meaning[bool].keys()), 'meanings.dill') processpool.close() processpool.join() print('Expression generation finished.')
def main(args): setup = experiment_setups.parse(args.setup) processes = setup.processes max_quantifier_length = setup.max_quantifier_length model_size = setup.model_size file_util = FileUtil( fileutil.base_dir(setup.dest_dir, setup.name, max_quantifier_length, model_size)) folderName = "{0}/{1}_length={2}_size={3}".format(setup.dest_dir, setup.name, max_quantifier_length, model_size) processpool = ProcessPool(nodes=processes) expressions = file_util.load_dill('expressions.dill') complexities = processpool.map( lambda ex: setup.measure_expression_complexity( ex, max_quantifier_length), expressions) file_util.dump_dill(complexities, 'expression_complexities.dill') processpool.close() processpool.join() print('Complexity Measuring finished.')
def main(args): setup = experiment_setups.parse(args.setup) dirname = fileutil.run_dir(args.dest_dir, setup.name, args.max_quantifier_length, args.model_size, args.name) file_util = FileUtil(dirname) languages = language_loader.load_languages(file_util) universe = generator.generate_simplified_models(args.model_size) if args.inf_strat == 'exact': informativeness_measurer = InformativenessMeasurer(len(universe)) elif args.inf_strat == 'simmax': informativeness_measurer = SimMaxInformativenessMeasurer(universe) else: raise ValueError('{0} is not a valid informativeness strategy.'.format( args.inf_strat)) with ProcessPool(nodes=args.processes) as pool: informativeness = pool.map(informativeness_measurer, languages) file_util.dump_dill(informativeness, 'informativeness_{0}.dill'.format(args.inf_strat)) print("measure_informativeness.py finished.")
def pathos_mp_batch_evaluator( func, arguments, n_cores=N_CORES, error_handling="continue", unpack_symbol=None, ): """Batch evaluator based on pathos.multiprocess.ProcessPool This uses a patched but older version of python multiprocessing that replaces pickling with dill and can thus handle decorated functions. Args: func (Callable): The function that is evaluated. arguments (Iterable): Arguments for the functions. Their interperation depends on the unpack argument. n_cores (int): Number of cores used to evaluate the function in parallel. Value below one are interpreted as one. If only one core is used, the batch evaluator disables everything that could cause problems, i.e. in that case func and arguments are never pickled and func is executed in the main process. error_handling (str): Can take the values "raise" (raise the error and stop all tasks as soon as one task fails) and "continue" (catch exceptions and set the output of failed tasks to the exception object without raising it. KeyboardInterrupt and SystemExit are always raised. unpack_symbol (str or None). Can be "**", "*" or None. If None, func just takes one argument. If "*", the elements of arguments are positional arguments for func. If "**", the elements of arguments are keyword arguments for func. Returns: list: The function evaluations. """ if not pathos_is_available: raise NotImplementedError( "To use the pathos_mp_batch_evaluator, install pathos with " "conda install -c conda-forge pathos.") _check_inputs(func, arguments, n_cores, error_handling, unpack_symbol) n_cores = int(n_cores) reraise = error_handling == "raise" @unpack(symbol=unpack_symbol) @catch(default="__traceback__", reraise=reraise) def internal_func(*args, **kwargs): return func(*args, **kwargs) if n_cores <= 1: res = [internal_func(arg) for arg in arguments] else: p = ProcessPool(nodes=n_cores) try: res = p.map(internal_func, arguments) except Exception as e: p.terminate() raise e return res
def parcompute_example(): dc = PMPExample() dc2 = PMPExample() dc3 = PMPExample() dc4 = PMPExample() n_datapoints = 100 inp_data = range(n_datapoints) r1 = dc.threadcompute(inp_data) assert (len(dc.cache) == n_datapoints) r2 = dc2.processcompute(inp_data) assert (len(dc2.cache) == 0) assert (r1 == r2) r3 = ProcessPool(4).map(dc3.compute, inp_data) r4 = ThreadPool(4).map(dc4.compute, inp_data) assert (r4 == r3 == r2) assert (len(dc3.cache) == 0) assert (len(dc4.cache) == n_datapoints) log.info("Size of threadpooled class caches: {0}, {1}".format( len(dc.cache), len(dc4.cache))) log.info("Size of processpooled class caches: {0}, {1}".format( len(dc2.cache), len(dc3.cache)))
def count(self, name='e1', meta='count', nodes=None, debug=False, parallel=False): """ count number of points in the neighborhood """ self.estimates[name] = {} self.estimates[name]['vname'] = None self.estimates[name][meta] = meta if nodes is None: nodes = self.nodes def f(i): # update data selected around target point self.search.update([self.x0[i], self.y0[i], self.z0[i]]) if debug: return np.sum( self.search.test), self.search.row_id[self.search.test] else: return np.sum(self.search.test), None # apply the estimator to each target if parallel: pool = ProcessPool() self.estimates[name]['estimate'] = np.array(pool.map(f, nodes)) else: self.estimates[name]['estimate'] = np.array(list(map(f, nodes)))
def run(self): """ Run experiment """ num_drivers = self.config['RL_parameters']['num_drivers'] percent_strategic_drivers = np.arange(0, 1.1, 0.1) num_strategic_drivers = [int(x * num_drivers) for x in percent_strategic_drivers] # Create a pool of processes num_processes = mp.cpu_count() pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for drivers in num_strategic_drivers: self.config['RL_parameters']['experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters']['num_strategic_drivers'] = drivers configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_05") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_05") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def run(self): self.logger.info("Starting baselines") city_states = self.data_provider.read_city_states() baseline_list = self.config['baselines']['baseline_list'] # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] for count in range(10): for name in baseline_list: configs.append({ 'name': name, 'count': count, 'config': self.config, 'city_states': city_states }) results = pool.amap(self.run_baseline, configs).get() pool.close() pool.join() pool.clear() episode_rewards = [] for result in results: episode_rewards += result self.data_exporter.export_baseline_data(episode_rewards) self.logger.info("Finished baselines")
def land_routine(self): while self.took_off: pool = ProcessPool() r = pool.map(self.client_land, self.agent_names) rospy.loginfo('Landing responses:') rospy.loginfo(r) self.took_off = not all(r) return True
def map_reduce_multicore( f: tp.Callable[..., ResultType], reduction: tp.Callable[[ResultType, ResultType], ResultType], initial_value: tp.Optional[ResultType] = None, args_list: tp.Optional[tp.Sequence[tp.Sequence]] = None, kwargs_list: tp.Optional[tp.Sequence[tp.Dict[str, tp.Any]]] = None, number_of_batches: tp.Optional[int] = None, multiprocessing_pool_type: MultiprocessingPoolType = MultiprocessingPoolType.default()) \ -> ResultType: if number_of_batches is None: if args_list is not None: number_of_batches = len(args_list) elif kwargs_list is not None: number_of_batches = len(kwargs_list) else: raise ValueError('Number_of_batches must be defined if ' 'both args_list and kwargs_list are empty') if args_list is None: args_list = number_of_batches * [list()] if kwargs_list is None: kwargs_list = number_of_batches * [dict()] result = initial_value if multiprocessing_pool_type == MultiprocessingPoolType.LOKY: from concurrent.futures import as_completed from loky import get_reusable_executor executor = \ get_reusable_executor(timeout=None, context='loky') futures = [ executor.submit(f, *args, **kwargs) for args, kwargs in zip(args_list, kwargs_list) ] result_from_future = lambda x: x.result() elif multiprocessing_pool_type == MultiprocessingPoolType.PATHOS: from pathos.pools import ProcessPool pool = ProcessPool() futures = [ pool.apipe(f, *args, **kwargs) for args, kwargs in zip(args_list, kwargs_list) ] result_from_future = lambda x: x.get() else: raise ValueError( f'Multiprocessing pool type {multiprocessing_pool_type} not supported' ) for future in futures: result = reduce_with_none(result, result_from_future(future), reduction) return result
def __init__(self, func, schema, ds, scheduler: str = "single", workers: int = 1, **kwargs): """| Transform applies a user defined function to each sample in single threaded manner. Parameters ---------- func: function user defined function func(x, **kwargs) schema: dict of dtypes the structure of the final dataset that will be created ds: Iterative input dataset or a list that can be iterated scheduler: str choice between "single", "threaded", "processed" workers: int how many threads or processes to use **kwargs: additional arguments that will be passed to func as static argument for all samples """ self._func = func self.schema = schema self._ds = ds self.kwargs = kwargs self.workers = workers if isinstance(self._ds, Transform): self.base_ds = self._ds.base_ds self._func = self._ds._func[:] self._func.append(func) self.kwargs = self._ds.kwargs[:] self.kwargs.append(kwargs) else: self.base_ds = ds self._func = [func] self.kwargs = [kwargs] if scheduler == "threaded" or (scheduler == "single" and workers > 1): self.map = ThreadPool(nodes=workers).map elif scheduler == "processed": self.map = ProcessPool(nodes=workers).map elif scheduler == "single": self.map = map elif scheduler == "ray": try: from ray.util.multiprocessing import Pool as RayPool except Exception: pass self.map = RayPool().map else: raise Exception( f"Scheduler {scheduler} not understood, please use 'single', 'threaded', 'processed'" )
def pool(self): """ Returns: The computing pool to process run the queries """ if self.servers is None: return ProcessPool(nodes=self.workers) else: return ParallelPool(nodes=self.workers, servers=self.servers)
def mlp(): if not os.path.exists(tar_dir): os.makedirs(tar_dir) iterator = glob(os.path.join(src_dir, '*.txt')) ncpus = cpu_count() if cpu_count() <= 8 else 8 pool = ProcessPool(ncpus) pool.map(preprocess_job, iterator)
def run(self): """ Run experiment """ days = [ 'Sunday_00_', 'Monday_00_', 'Tuesday_00_', 'Wednesday_00_', 'Thursday_00_', 'Friday_00_', 'Saturday_00_', 'Sunday_01_', 'Monday_01_', 'Tuesday_01_', 'Wednesday_01_', 'Thursday_01_', 'Friday_01_', 'Saturday_01_', 'Sunday_02_', 'Monday_02_', 'Tuesday_02_', 'Wednesday_02_', 'Thursday_02_', 'Friday_02_', 'Saturday_02_', 'Sunday_03_', 'Monday_03_', 'Tuesday_03_', 'Wednesday_03_', 'Thursday_03_', 'Friday_03_', 'Saturday_03_', 'Sunday_04_', 'Monday_04_', 'Tuesday_04_', 'Wednesday_04_', 'Thursday_04_', 'Friday_04_', 'Saturday_04_' ] num_drivers = [4000, 5000, 6000, 7000, 8000, 9000, 10000] imbalance_thresholds = [2] # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for d in num_drivers: for threshold in imbalance_thresholds: for day in days: self.config['RL_parameters']['num_drivers'] = d self.config['RL_parameters']['num_strategic_drivers'] = d self.config['RL_parameters'][ 'imbalance_threshold'] = threshold self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) if os.path.isfile(self.config['app']['DATA_DIR'] + 'city_states/' + day + 'city_states.dill'): self.config['RL_parameters'][ 'city_states_filename'] = day + 'city_states.dill' self.config['RL_parameters']['best_model_filename'] = ( day + str(d) + '_' + str(threshold) + '_model.dill') configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_07") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_07")
def opt_ind_params(perf_f, res, games, gids, role, pid, init_params, bounds): def min_f(params_test): return (-perf_f(params_test, res, games, gids, role, pid)) pool = ProcessPool(nodes=mp.cpu_count()) opt = scp.optimize.differential_evolution(min_f, bounds, workers=pool.map) return opt pool.close() pool.join() pool.clear()
def eval_s_space(self, space, features, x): """For a list of functions in `space` represented by `features` and a list `x`, compute the corresponding list of outputs. """ def f(feature, xi): return self.eval_s(lambda t: space.eval_u_one(feature, t), xi[0]) p = ProcessPool(nodes=config.processes) res = p.map(f, features, x) return np.array(list(res))
def extract(self): def text_gen(txt_dir): # Yields markup & name for fname in os.listdir(txt_dir): if not fname.endswith('.txt'): continue yield fname def parsing_job(fname): print("Parsing: {}".format(fname)) # Read text filepath = os.path.join(self.txt_dir, fname) with codecs.open(filepath, 'rb', encoding='utf-8') as fin: text = fin.read() name, ext = os.path.splitext(fname) # Parse MDA part msg = "" mda, end = self.parse_mda(text) # Parse second time if first parse results in index if mda and len(mda.encode('utf-8')) < 1000: mda, _ = self.parse_mda(text, start=end) if mda: # Has value msg = "SUCCESS" mda_path = os.path.join(self.mda_dir, name + '.mda') with codecs.open(mda_path, 'w', encoding='utf-8') as fout: fout.write(mda) else: msg = msg if mda else "MDA NOT FOUND" print("{},{}".format(name, msg)) return name + '.txt', msg # ncpus = cpu_count() if cpu_count() <= 8 else 8 pool = ProcessPool(ncpus) _start = time.time() parsing_failed = pool.map( parsing_job, \ text_gen(self.txt_dir) ) _end = time.time() print("MDA parsing time taken: {} seconds.".format(_end - _start)) # Write failed parsing list count = 0 parsing_log = 'parsing.log' with open(parsing_log, 'w') as fout: print("Writing parsing results to {}".format(parsing_log)) for name, msg in parsing_failed: fout.write('{},{}\n'.format(name, msg)) if msg != "SUCCESS": count = count + 1 print("Number of failed text:{}".format(count))
def runGOanalysis(clusters, processes=10): df = pd.DataFrame() pool = ProcessPool(nodes=processes) newDf = pool.map(_runGOanalysis, clusters) pool.close() pool.join() df = pd.concat([df, newDf], axis=0) geneIndex = pd.read_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx'), index_col=0) geneIndex = pd.concat([geneIndex, newEntrez], axis=0) geneIndex.to_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx')) return(geneIndex)
def reachability(model, from_state, goal, max_length=2000, on_start=None, on_reach=None, max_repeat=10000, n_workers=1): if isinstance(model, pypint.Model): model = pypint_to_model(model) if isinstance(goal, list) or isinstance(goal, dict): goal = Goal(goal) if isinstance(from_state, list): if from_state: if isinstance(from_state[0], str): from_state = dict([(e, 1) for e in from_state]) elif isinstance(from_state[0], tuple): from_state = dict(from_state) from_state = complete_state(from_state, model) trace = Trace(from_state) if on_start is not None: next_subgoal = goal.subgoals[0] on_start(model, trace, next_subgoal) if n_workers == 1: for n_repeat in range(max_repeat): reached, trace = _reach(copy.copy(model), from_state, goal, max_length, on_start, on_reach) if reached is True: return reached, trace else: pool = ProcessPool(n_workers) processes = set([]) n_repeat = 0 while n_repeat < max_repeat and n_repeat < n_workers: processes.add( pool.apipe(_reach, copy.copy(model), from_state, goal, max_length, on_start, on_reach)) n_repeat += 1 reached = pypint.Inconc while reached is not True and n_repeat < max_repeat: for process in processes: if process.ready(): reached, trace = process.get() processes.remove(process) if reached is True: return reached, trace else: processes.add( pool.apipe(_reach, copy.copy(model), from_state, goal, max_length, on_start, on_reach)) n_repeat += 1 break return reached, trace
def multi_process(data_path, time_list): for time in time_list[:]: # print(time) base_path = arrow.get(time['ini']).format('YYYYMMDDHH') # --预报数据处理 gefs_fcst = GEFSFcst(data_path['gefs_fcst'], time, base_path) p = ProcessPool(7) for n in range(21): # gefs_fcst.download(n) p.apipe(download, gefs_fcst, n) p.close() p.join() p.clear()
def eval_KL_bases(self, ls, sensors, M): def helper(l): grf = GRF_KL( self.T, kernel=self.kernel, length_scale=l[0], num_eig=M, N=self.N, interp=self.interp, ) return np.ravel(grf.bases(sensors)) p = ProcessPool(nodes=config.processes) return np.vstack(p.map(helper, ls))