def test_set_one_file_pass(self): """ Test set function set_from_tracks with one input.""" pool = Pool() tc_track = TCTracks(pool) tc_track.read_processed_ibtracs_csv(TEST_TRACK) tc_track.calc_random_walk() tc_track.equal_timestep() tc_haz = TropCyclone(pool) tc_haz.set_from_tracks(tc_track, CENTR_TEST_BRB) tc_haz.check() pool.close() pool.join() self.assertEqual(tc_haz.tag.haz_type, 'TC') self.assertEqual(tc_haz.tag.description, '') self.assertEqual(tc_haz.units, 'm/s') self.assertEqual(tc_haz.centroids.size, 296) self.assertEqual(tc_haz.event_id.size, 10) self.assertTrue(isinstance(tc_haz.intensity, sparse.csr.csr_matrix)) self.assertTrue(isinstance(tc_haz.fraction, sparse.csr.csr_matrix)) self.assertEqual(tc_haz.intensity.shape, (10, 296)) self.assertEqual(tc_haz.fraction.shape, (10, 296))
def run_N(self, nb_execution=10, loop=100, grphq=False, pas=10, duration_gif=0.5): """ Exécute N itération de l'algorithme des k-means, et conserve les centres qui produisent le moins d'erreur. Chaque itération est produite à partir de centres initiaux aléatoires, donc les résultats sont différents à chaque fois. Retourne cette erreur minimale. Les paramètres d'entrée sont les même que pour run, avec l'ajout de : nb_execution : entier désignant le nombre de calcul de k-means à faire. """ f = partial(self.__k_run, loop=loop, grphq=grphq, pas=pas) pool = Pool(self.cpu) memory = list(pool.uimap(f, range(nb_execution))) pool.close() pool.join() ind = np.argmin(np.array([m[0] for m in memory])) means = memory[ind][1] self.means = means self.calc_grp() if grphq: self.grphq.create_gif(duration=duration_gif) del pool return memory[ind][0]
def test_calc_uncertainty_pool_pass(self): """Test parallel compute the uncertainty distribution for an impact""" exp_unc, impf_unc, _ = make_input_vars() haz = haz_dem() unc_calc = CalcImpact(exp_unc, impf_unc, haz) unc_data = unc_calc.make_sample(N=2) pool = Pool(nodes=2) try: unc_data = unc_calc.uncertainty(unc_data, calc_eai_exp=False, calc_at_event=False, pool=pool) finally: pool.close() pool.join() pool.clear() self.assertEqual(unc_data.unit, exp_dem().value_unit) self.assertListEqual(unc_calc.rp, [5, 10, 20, 50, 100, 250]) self.assertEqual(unc_calc.calc_eai_exp, False) self.assertEqual(unc_calc.calc_at_event, False) self.assertEqual(unc_data.aai_agg_unc_df.size, unc_data.n_samples) self.assertEqual(unc_data.tot_value_unc_df.size, unc_data.n_samples) self.assertEqual(unc_data.freq_curve_unc_df.size, unc_data.n_samples * len(unc_calc.rp)) self.assertTrue(unc_data.eai_exp_unc_df.empty) self.assertTrue(unc_data.at_event_unc_df.empty)
def run(self): """ Run experiment """ num_drivers = np.arange(1000, 6500, 500) # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for drivers in num_drivers: self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters'][ 'city_states_filename'] = "city_states.dill" self.config['RL_parameters']['num_drivers'] = drivers self.config['RL_parameters']['num_strategic_drivers'] = drivers configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_02") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_02") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def run(self): self.logger.info("Starting baselines") city_states = self.data_provider.read_city_states() baseline_list = self.config['baselines']['baseline_list'] # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] for count in range(10): for name in baseline_list: configs.append({ 'name': name, 'count': count, 'config': self.config, 'city_states': city_states }) results = pool.amap(self.run_baseline, configs).get() pool.close() pool.join() pool.clear() episode_rewards = [] for result in results: episode_rewards += result self.data_exporter.export_baseline_data(episode_rewards) self.logger.info("Finished baselines")
def run(self): """ Run experiment """ num_drivers = self.config['RL_parameters']['num_drivers'] percent_strategic_drivers = np.arange(0, 1.1, 0.1) num_strategic_drivers = [int(x * num_drivers) for x in percent_strategic_drivers] # Create a pool of processes num_processes = mp.cpu_count() pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for drivers in num_strategic_drivers: self.config['RL_parameters']['experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters']['num_strategic_drivers'] = drivers configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_05") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_05") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def main(argv): print('Input/Output data folder: ', DATA_DIR) # set parallel processing pool = Pool() # exposures expo_dict = calc_exposure(DATA_DIR) # tracks sel_tr = calc_tracks(DATA_DIR, pool) # dictionary of tc per island tc_dict = calc_tc(expo_dict, sel_tr, DATA_DIR, pool) # damage per isl imp_dict = calc_imp(expo_dict, tc_dict, DATA_DIR) # damage irma get_irma_damage(imp_dict) # average annual impact aai_isl(imp_dict) # compute impact exceedance frequency get_efc_isl(imp_dict) # FIG03 and FIG04 fig03_fig04(DATA_DIR, FIG_DIR) # 5min # FIG 06 fig06(DATA_DIR, FIG_DIR) pool.close() pool.join()
def main(): processes = args.processes setup = experiment_setups.parse(args.setup) max_quantifier_length = args.max_quantifier_length model_size = args.model_size file_util = FileUtil(fileutil.base_dir(args.dest_dir, setup.name, max_quantifier_length, model_size)) universe = setup.generate_models(model_size) folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir,setup.name,max_quantifier_length,model_size) os.makedirs("{0}".format(folderName), exist_ok=True) processpool = ProcessPool(nodes=processes) expression_generator = ExpressionGenerator(setup, model_size, universe, processpool) (generated_expressions_dict, expressions_by_meaning) = \ expression_generator.generate_all_expressions(max_quantifier_length) print("{0} expressions!".format(len(expressions_by_meaning[bool].values()))) file_util.dump_dill(expressions_by_meaning[bool], 'generated_expressions.dill') file_util.dump_dill(list(expressions_by_meaning[bool].values()), 'expressions.dill') file_util.dump_dill(list(expressions_by_meaning[bool].keys()), 'meanings.dill') processpool.close() processpool.join() print('Expression generation finished.')
def retrieve_fields(self): ip, reqlink, reqtype, response, virtualm, keytemp = [], [], [], [], [], [] bytes, avg_time, count, uniq_vis, total_vis = 0, 0, 0, 0, len(self.keys) for key in self.keys: keytemp.append(str(key)) # creating an ordered dictionary containing log data retrieved from column_family log = self.cass_conn.multiget(keytemp) # starting a pool of 5 worker processes pool = Pool() pool.ncpus = 5 for item in log.values(): # appending lists with their respective values ip.append(item['host']), reqlink.append(item['request_link']), reqtype.append(item['request_type']), response.append(str(item['response_code'])), virtualm.append(item['virtual_machine']) if item['byte_transfer'] != '-': bytes += item['byte_transfer'] if item['response_time'] != '-': avg_time += item['response_time'] count += 1 avg_time = avg_time/count # using the pool of workers to get results results = pool.map(self.unique_count, [ip, reqtype, reqlink, response, virtualm]) pool.close() pool.join() uniq_vis = len(results[0][0]) return self.time, results[0][0], results[0][1], results[1][0], results[1][1], results[2][0], results[2][1], results[3][0], results[3][1], results[4][0], results[4][1], bytes, avg_time, uniq_vis, total_vis
def main(args): setup = experiment_setups.parse(args.setup) processes = setup.processes max_quantifier_length = setup.max_quantifier_length model_size = setup.model_size file_util = FileUtil( fileutil.base_dir(setup.dest_dir, setup.name, max_quantifier_length, model_size)) folderName = "{0}/{1}_length={2}_size={3}".format(setup.dest_dir, setup.name, max_quantifier_length, model_size) processpool = ProcessPool(nodes=processes) expressions = file_util.load_dill('expressions.dill') complexities = processpool.map( lambda ex: setup.measure_expression_complexity( ex, max_quantifier_length), expressions) file_util.dump_dill(complexities, 'expression_complexities.dill') processpool.close() processpool.join() print('Complexity Measuring finished.')
def run(self): """ Run experiment """ num_drivers = np.arange(1000, 6500, 500) thresholds = np.arange(5, 55, 5) thresholds = np.insert(thresholds, 0, 2) combinations = list(itertools.product(num_drivers, thresholds)) # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for comb in combinations: self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters']['num_drivers'] = comb[0] self.config['RL_parameters']['imbalance_threshold'] = comb[1] configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_04") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_04") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def test_calc_uncertainty_pool_pass(self): """Test compute the uncertainty distribution for an impact""" ent_iv, _ = make_costben_iv() _, _, haz_iv = make_input_vars() unc_calc = CalcCostBenefit(haz_iv, ent_iv) unc_data = unc_calc.make_sample(N=2) pool = Pool(n=2) try: unc_data = unc_calc.uncertainty(unc_data, pool=pool) finally: pool.close() pool.join() pool.clear() self.assertEqual(unc_data.unit, ent_dem().exposures.value_unit) self.assertEqual(unc_data.tot_climate_risk_unc_df.size, unc_data.n_samples) self.assertEqual( unc_data.cost_ben_ratio_unc_df.size, unc_data.n_samples * 4 #number of measures ) self.assertEqual(unc_data.imp_meas_present_unc_df.size, 0) self.assertEqual( unc_data.imp_meas_future_unc_df.size, unc_data.n_samples * 4 * 5 #All measures 4 and risks/benefits 5 )
def run(self): """ Run experiment """ days = [ 'Sunday_00_', 'Monday_00_', 'Tuesday_00_', 'Wednesday_00_', 'Thursday_00_', 'Friday_00_', 'Saturday_00_', 'Sunday_01_', 'Monday_01_', 'Tuesday_01_', 'Wednesday_01_', 'Thursday_01_', 'Friday_01_', 'Saturday_01_', 'Sunday_02_', 'Monday_02_', 'Tuesday_02_', 'Wednesday_02_', 'Thursday_02_', 'Friday_02_', 'Saturday_02_', 'Sunday_03_', 'Monday_03_', 'Tuesday_03_', 'Wednesday_03_', 'Thursday_03_', 'Friday_03_', 'Saturday_03_', 'Sunday_04_', 'Monday_04_', 'Tuesday_04_', 'Wednesday_04_', 'Thursday_04_', 'Friday_04_', 'Saturday_04_' ] num_drivers = [4000, 5000, 6000, 7000, 8000, 9000, 10000] imbalance_thresholds = [2] # Create a pool of processes num_processes = mp.cpu_count() self.logger.info("Processes: {}".format(num_processes)) pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for d in num_drivers: for threshold in imbalance_thresholds: for day in days: self.config['RL_parameters']['num_drivers'] = d self.config['RL_parameters']['num_strategic_drivers'] = d self.config['RL_parameters'][ 'imbalance_threshold'] = threshold self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) if os.path.isfile(self.config['app']['DATA_DIR'] + 'city_states/' + day + 'city_states.dill'): self.config['RL_parameters'][ 'city_states_filename'] = day + 'city_states.dill' self.config['RL_parameters']['best_model_filename'] = ( day + str(d) + '_' + str(threshold) + '_model.dill') configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_07") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_07")
def opt_ind_params(perf_f, res, games, gids, role, pid, init_params, bounds): def min_f(params_test): return (-perf_f(params_test, res, games, gids, role, pid)) pool = ProcessPool(nodes=mp.cpu_count()) opt = scp.optimize.differential_evolution(min_f, bounds, workers=pool.map) return opt pool.close() pool.join() pool.clear()
def runGOanalysis(clusters, processes=10): df = pd.DataFrame() pool = ProcessPool(nodes=processes) newDf = pool.map(_runGOanalysis, clusters) pool.close() pool.join() df = pd.concat([df, newDf], axis=0) geneIndex = pd.read_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx'), index_col=0) geneIndex = pd.concat([geneIndex, newEntrez], axis=0) geneIndex.to_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx')) return(geneIndex)
def test_read_raster_pool_pass(self): """Test from_raster constructor with pool""" from pathos.pools import ProcessPool as Pool pool = Pool() haz_fl = Hazard.from_raster([HAZ_DEMO_FL], haz_type='FL', pool=pool) haz_fl.check() self.assertEqual(haz_fl.intensity.shape, (1, 1032226)) self.assertEqual(haz_fl.intensity.min(), -9999.0) self.assertAlmostEqual(haz_fl.intensity.max(), 4.662774085998535) pool.close() pool.join()
def multi_process(data_path, time_list): for time in time_list[:]: # print(time) base_path = arrow.get(time['ini']).format('YYYYMMDDHH') # --预报数据处理 gefs_fcst = GEFSFcst(data_path['gefs_fcst'], time, base_path) p = ProcessPool(7) for n in range(21): # gefs_fcst.download(n) p.apipe(download, gefs_fcst, n) p.close() p.join() p.clear()
def main(args): processes = args.processes chunk_size = args.chunk_size setup = experiment_setups.parse(args.setup) max_quantifier_length = args.max_quantifier_length model_size = args.model_size folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir, setup.name, max_quantifier_length, model_size) with open('{0}/generated_expressions.dill'.format(folderName), 'rb') as file: expressions_by_meaning = dill.load(file) processpool = ProcessPool(nodes=processes) merger = PresuppositionMerger(setup, processpool, chunk_size) quantifiers_by_meaning = merger.add_presuppositions(expressions_by_meaning) processpool.close() processpool.join() print("Quantifiers generated.") generated_quantifiers = list(quantifiers_by_meaning.values()) generated_meanings = list(quantifiers_by_meaning.keys()) print("Quantifiers listed.") print("Saving files....") with open('{0}/generated_meanings.dill'.format(folderName), 'wb') as file: dill.dump(generated_meanings, file) print("File 1 saved") with open('{0}/generated_quantifiers.dill'.format(folderName), 'wb') as file: dill.dump(generated_quantifiers, file) print("File 2 saved") with open('{0}/generated_quantifiers.txt'.format(folderName), 'w') as f: for quantifier in generated_quantifiers: f.write("{0}\n".format(quantifier)) print('Generation finished')
def get_city_states(self): """ Creates city states from start time to end time :param: :return: """ city_states = [] start_time = self.start_time end_time = self.end_time # Create array of time slice values between the start and end time business_days = self.config['city_state_creator']['business_days'] business_hours_start = self.config['city_state_creator'][ 'business_hours_start'] business_hours_end = self.config['city_state_creator'][ 'business_hours_end'] index = pd.date_range(start=start_time, end=end_time, freq=str(self.time_unit_duration) + 'min') # Filter only the required days and hours index = index[index.day_name().isin(business_days)] index = index[(index.hour >= business_hours_start) & (index.hour <= business_hours_end)] time_slice_starts = index - timedelta( minutes=self.time_slice_duration / 2) time_slice_ends = index + timedelta(minutes=self.time_slice_duration / 2) # Create arguments dictionary for parallelization self.parallel_args = self.create_parallel_args(index, time_slice_starts, time_slice_ends) # Create city states manager = Manager() city_states = manager.dict() N = len(index.values) # Create parallel pool self.logger.info("Creating parallelization pool") pool = ProcessPool(nodes=25) pool.map(self.get_city_state, ([city_states, t] for t in xrange(N))) pool.close() pool.join() pool.clear() self.logger.info("Finished creating city states") return dict(city_states)
def extract_all(self, merge_file, locate, time): extract_path = self.data_path + 'extract_{}/'.format( time.format('YYYYMMDDHH')) try: os.makedirs(extract_path) except OSError: pass finally: p = ProcessPool(16) for lat, lon in locate[:]: p.apipe(self.extract_point, merge_file, extract_path, lat, lon) p.close() p.join() p.clear() os.remove(merge_file)
class MultiprocessingDistributor(DistributorBaseClass): """ Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine. """ def __init__(self, n_workers, disable_progressbar=False, progressbar_title="Feature Extraction", show_warnings=True): """ Creates a new MultiprocessingDistributor instance :param n_workers: How many workers should the multiprocessing pool have? :type n_workers: int :param disable_progressbar: whether to show a progressbar or not. :type disable_progressbar: bool :param progressbar_title: the title of the progressbar :type progressbar_title: basestring :param show_warnings: whether to show warnings or not. :type show_warnings: bool """ self.pool = Pool(nodes=n_workers) self.n_workers = n_workers self.disable_progressbar = disable_progressbar self.progressbar_title = progressbar_title def distribute(self, func, partitioned_chunks, kwargs): """ Calculates the features in a parallel fashion by distributing the map command to a thread pool :param func: the function to send to each worker. :type func: callable :param partitioned_chunks: The list of data chunks - each element is again a list of chunks - and should be processed by one worker. :type partitioned_chunks: iterable :param kwargs: parameters for the map function :type kwargs: dict of string to parameter :return: The result of the calculation as a list - each item should be the result of the application of func to a single element. """ return self.pool.imap(partial(func, **kwargs), partitioned_chunks) def close(self): """ Collects the result from the workers and closes the thread pool. """ self.pool.close() self.pool.terminate() self.pool.join()
def test_est_comp_time_pass(self): exp_unc, _, haz_unc = make_imp_uncs() unc = Uncertainty({'exp': exp_unc, 'haz': haz_unc}) unc.make_sample(N=1, sampling_kwargs={'calc_second_order': False}) est = unc.est_comp_time(0.12345) self.assertEqual(est, 1 * (2 + 2) * 0.123) # N * (D + 2) pool = Pool(nodes=4) est = unc.est_comp_time(0.12345, pool) self.assertEqual(est, 1 * (2 + 2) * 0.123 / 4) # N * (D + 2) pool.close() pool.join() pool.clear()
def test_pathos_pp_callable () : """Test parallel processnig with pathos: ParallelPool """ logger = getLogger("ostap.test_pathos_pp_callable") if not pathos : logger.error ( "pathos is not available" ) return logger.info ('Test job submission with %s' % pathos ) if DILL_PY3_issue : logger.warning ("test is disabled (DILL/ROOT/PY3 issue)" ) return ## logger.warning ("test is disabled for UNKNOWN REASON") ## return from pathos.helpers import cpu_count ncpus = cpu_count () from pathos.pools import ParallelPool as Pool pool = Pool ( ncpus ) logger.info ( "Pool is %s" % ( type ( pool ).__name__ ) ) pool.restart ( True ) mh = MakeHisto() jobs = pool.uimap ( mh.process , [ ( i , n ) for ( i , n ) in enumerate ( inputs ) ] ) result = None for h in progress_bar ( jobs , max_value = len ( inputs ) ) : if not result : result = h else : result.Add ( h ) pool.close () pool.join () pool.clear () logger.info ( "Histogram is %s" % result.dump ( 80 , 10 ) ) logger.info ( "Entries %s/%s" % ( result.GetEntries() , sum ( inputs ) ) ) with wait ( 1 ) , use_canvas ( 'test_pathos_pp_callable' ) : result.draw ( ) return result
def opt_pop_params(perf_f, res, games, gids, init_params, bounds): def min_f(params): n_p1 = len(res[gids[0]]["params"][0]) n_p2 = len(res[gids[0]]["params"][1]) ll = 0 for pid in range(n_p1): ll += perf_f(params, res, games, gids, 0, pid) for pid in range(n_p2): ll += perf_f(params, res, games, gids, 1, pid) return -ll pool = ProcessPool() opt = scp.optimize.differential_evolution(min_f, bounds, workers=pool.imap) pool.close() pool.join() pool.clear() # opt = scp.optimize.differential_evolution(min_f, bounds) return (opt)
def run(self): """ Run experiment """ days = [ 'Sunday_00_', 'Monday_00_', 'Tuesday_00_', 'Wednesday_00_', 'Thursday_00_', 'Friday_00_', 'Saturday_00_', 'Sunday_01_', 'Monday_01_', 'Tuesday_01_', 'Wednesday_01_', 'Thursday_01_', 'Friday_01_', 'Saturday_01_', 'Sunday_02_', 'Monday_02_', 'Tuesday_02_', 'Wednesday_02_', 'Thursday_02_', 'Friday_02_', 'Saturday_02_', 'Sunday_03_', 'Monday_03_', 'Tuesday_03_', 'Wednesday_03_', 'Thursday_03_', 'Friday_03_', 'Saturday_03_' ] # Create a pool of processes num_processes = mp.cpu_count() pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for day in days: self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters'][ 'city_states_filename'] = day + 'city_states.dill' self.config['RL_parameters'][ 'best_model_filename'] = day + 'model.dill' configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_06") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_06") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def gap_stat(self, err_init, B=10): """ Calcule les statistiques utiles dans le choix du nombre optimal de cluster. Paramètres d'entrée : err_init : matrice de la forme [nb_cluster; erreur de classification; erreur relative de classification; variance intra-classe] B : Nombre d'itération de k-means avec échantillons aléatoires. Paramètres de sortie : stat : matrice de la forme [nb_cluster; erreur de classification; erreur relative de classification; variance intra-classe; logarithme de l'erreur de classification; moyenne des log des erreurs avec echantillons aléatoires; différence entre les logs des erreurs obtenues et la moyenne des log des erreurs avec echantillons aléatoires; gap statistical | (différence des échantillons n) - (différence des échantillons n+1 * variance des log des erreurs avec echantillons aléatoires)] """ pool = Pool(self.cpu) pool.close() pool.join() mini, maxi = np.min(self.data.data, axis=0), np.max(self.data.data, axis=0) shape = self.data.data.shape log = np.log10(err_init[1]) mean_alea, var_alea = [], [] for i in range(1, self.nb_cluster + 1): err = [] f = partial(self.__stat_i, mini=mini, maxi=maxi, shape=shape, i=i) pool.restart() err = list(pool.map(f, range(B))) pool.close() pool.join() err = np.log10(np.array(err)) mean_alea.append(np.mean(err)) var_alea.append(np.std(err)) mean_alea = np.array(mean_alea) var_alea = np.array(var_alea) * np.sqrt(1 + (1 / float(B))) gap = mean_alea - log diff_gap = gap[0:-1] - (gap[1:] - var_alea[1:]) diff_gap = np.hstack((diff_gap, 0)) stat = np.vstack((err_init, log, mean_alea, gap, diff_gap)) del pool return stat
def main(args): processes = args.processes setup = experiment_setups.parse(args.setup) max_quantifier_length = args.max_quantifier_length model_size = args.model_size file_util = FileUtil(fileutil.base_dir(args.dest_dir, setup.name, max_quantifier_length, model_size)) folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir, setup.name, max_quantifier_length, model_size) processpool = ProcessPool(nodes=processes) meanings = file_util.load_dill('meanings.dill') costs = processpool.map(measurer.measure_communicative_cost, meanings) file_util.dump_dill(costs, 'expression_costs.dill') processpool.close() processpool.join() print('Informativeness Measuring finished.')
def run(self): """ Run experiment """ ind_percent = np.arange(0., 1.1, 0.1) reb_percent = np.arange(0., 1.1, 0.1) ind_percent[0] = 0.01 reb_percent[0] = 0.01 combinations = list(itertools.product(ind_percent, reb_percent)) num_episodes = self.config['RL_parameters']['num_episodes'] # Create a pool of processes num_processes = mp.cpu_count() pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for comb in combinations: self.config['RL_parameters']['experiment'] = self.expt_name + "_" + str(count) ind_episodes = int(comb[0] * num_episodes) reb_episodes = int(comb[1] * num_episodes) if (ind_episodes + reb_episodes) < num_episodes: self.config['RL_parameters']['ind_episodes'] = ind_episodes self.config['RL_parameters']['reb_episodes'] = reb_episodes configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_01") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_01") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def run(self): """ Run experiment """ num_drivers = np.arange(1000, 6500, 500) objectives = ['pickups', 'revenue'] combinations = list(itertools.product(num_drivers, objectives)) # Create a pool of processes num_processes = mp.cpu_count() pool = ProcessPool(nodes=num_processes) configs = [] count = 0 for comb in combinations: self.config['RL_parameters'][ 'experiment'] = self.expt_name + "_" + str(count) self.config['RL_parameters'][ 'city_states_filename'] = "city_states.dill" self.config['RL_parameters']['num_drivers'] = comb[0] self.config['RL_parameters']['num_strategic_drivers'] = comb[0] self.config['RL_parameters']['objective'] = comb[1] configs.append(deepcopy(self.config)) count += 1 self.logger.info("Starting expt_03") results = pool.amap(self.run_rl_training, configs).get() pool.close() pool.join() pool.clear() self.logger.info("Finished expt_03") # Export best episode self.data_exporter.export_episode(results, self.expt_name + ".dill")
def multiProcessLit(genes, directory, rettype="full", processes=13): """ Runs NLP on literature with Pathos parallel processing pool. parameters ---------- genes: list of genes directory: directory containing literature for NLP processes (optional): int, number of parallel processe (default 13) """ resultDirs = [ "Sentences", "Categories", "Functions", "Regions", "CellTypes", "NTs", "Physio", ] for item in resultDirs: if not os.path.exists( os.path.join(clusterDirectory, item + "_" + cluster + "/")): os.mkdir(os.path.join(clusterDirectory, item + "_" + cluster + "/")) pathList = [] for item in genes: if rettype == "full": fullpath = os.path.join(directory, "papers/" + item + "/") elif rettype == "abstract": fullpath = os.path.join(directory, "abstracts/" + item + "/") textFile = os.path.join(fullpath, "CombinedFullTexts.txt") pathList.append(textFile) pool = ProcessPool(nodes=processes) pool.map(multiProcessTextMinimal, pathList) pool.close() pool.join()
def run_multiple(func, *args, argsList=None, kwargsList=None, messageFn=None, serial=False): """ Makes use of the pathos.multiprocessing module to run a function simultanously multiple times. This is meant mainly to update multiple plots at the same time, which can accelerate significantly the process of visualizing data. All arguments passed to the function, except func, can be passed as specified in the arguments section of this documentation or as a list containing multiple instances of them. If a list is passed, each time the function needs to be run it will take the next item of the list. If a single item is passed instead, this item will be repeated for each function run. However, at least one argument must be a list, so that the number of times that the function has to be ran is defined. Arguments ---------- func: function The function to be executed. It has to be prepared to recieve the arguments as they are provided to it (zipped). See the applyMethod() function as an example. *args: Contains all the arguments that are specific to the individual function that we want to run. See each function separately to understand what you need to pass (you may not need this parameter). argsList: array-like An array of arguments that have to be passed to the executed function. Can also be a list of arrays (see this function's description). WARNING: Currently it only works properly for a list of arrays. Didn't fix this because the lack of interest of argsList on Plot's methods (everything is passed as keyword arguments). kwargsList: dict A dictionary with the keyword arguments that have to be passed to the executed function. If the executed function is a Plot's method, these can be the settings, for example. Can also be a list of dicts (see this function's description). messageFn: function Function that recieves the number of tasks and nodes and needs to return a string to display as a description of the progress bar. serial: bool If set to true, multiprocessing is not used. This seems to have little sense, but it is useful to switch easily between multiprocessing and serial with the same code. Returns ---------- results: list A list with all the returned values or objects from each function execution. This list is ordered, so results[0] is the result of executing the function with argsList[0] and kwargsList[0]. """ #Prepare the arguments to be passed to the initSinglePlot function toZip = [*args, argsList, kwargsList] for i, arg in enumerate(toZip): if not isinstance(arg, (list, tuple, np.ndarray)): toZip[i] = itertools.repeat(arg) else: nTasks = len(arg) # Run things in serial mode in case it is demanded serial = serial or _MAX_NPROCS == 1 or nTasks == 1 if serial: return [func(argsTuple) for argsTuple in zip(*toZip)] #Create a pool with the appropiate number of processes pool = Pool(min(nTasks, _MAX_NPROCS)) #Define the plots array to store all the plots that we initialize results = [None] * nTasks #Initialize the pool iterator and the progress bar that controls it progress = tqdm.tqdm(pool.imap(func, zip(*toZip)), total=nTasks) #Set a description for the progress bar if not callable(messageFn): message = "Updating {} plots in {} processes".format( nTasks, pool.nodes) else: message = messageFn(nTasks, pool.nodes) progress.set_description(message) #Run the processes and store each result in the plots array for i, res in enumerate(progress): results[i] = res pool.close() pool.join() pool.clear() return results