Beispiel #1
0
    def test_set_one_file_pass(self):
        """ Test set function set_from_tracks with one input."""

        pool = Pool()

        tc_track = TCTracks(pool)

        tc_track.read_processed_ibtracs_csv(TEST_TRACK)
        tc_track.calc_random_walk()
        tc_track.equal_timestep()

        tc_haz = TropCyclone(pool)
        tc_haz.set_from_tracks(tc_track, CENTR_TEST_BRB)
        tc_haz.check()

        pool.close()
        pool.join()

        self.assertEqual(tc_haz.tag.haz_type, 'TC')
        self.assertEqual(tc_haz.tag.description, '')
        self.assertEqual(tc_haz.units, 'm/s')
        self.assertEqual(tc_haz.centroids.size, 296)
        self.assertEqual(tc_haz.event_id.size, 10)
        self.assertTrue(isinstance(tc_haz.intensity, sparse.csr.csr_matrix))
        self.assertTrue(isinstance(tc_haz.fraction, sparse.csr.csr_matrix))
        self.assertEqual(tc_haz.intensity.shape, (10, 296))
        self.assertEqual(tc_haz.fraction.shape, (10, 296))
Beispiel #2
0
 def run_N(self,
           nb_execution=10,
           loop=100,
           grphq=False,
           pas=10,
           duration_gif=0.5):
     """
     Exécute N itération de l'algorithme des k-means, et conserve les centres qui produisent le moins d'erreur.
     Chaque itération est produite à partir de centres initiaux aléatoires, donc les résultats sont différents à chaque fois.
     Retourne cette erreur minimale.
     Les paramètres d'entrée sont les même que pour run, avec l'ajout de :
         nb_execution : entier désignant le nombre de calcul de k-means à faire.
     """
     f = partial(self.__k_run, loop=loop, grphq=grphq, pas=pas)
     pool = Pool(self.cpu)
     memory = list(pool.uimap(f, range(nb_execution)))
     pool.close()
     pool.join()
     ind = np.argmin(np.array([m[0] for m in memory]))
     means = memory[ind][1]
     self.means = means
     self.calc_grp()
     if grphq: self.grphq.create_gif(duration=duration_gif)
     del pool
     return memory[ind][0]
Beispiel #3
0
    def test_calc_uncertainty_pool_pass(self):
        """Test parallel compute the uncertainty distribution for an impact"""

        exp_unc, impf_unc, _ = make_input_vars()
        haz = haz_dem()
        unc_calc = CalcImpact(exp_unc, impf_unc, haz)
        unc_data = unc_calc.make_sample(N=2)

        pool = Pool(nodes=2)
        try:
            unc_data = unc_calc.uncertainty(unc_data,
                                            calc_eai_exp=False,
                                            calc_at_event=False,
                                            pool=pool)
        finally:
            pool.close()
            pool.join()
            pool.clear()

        self.assertEqual(unc_data.unit, exp_dem().value_unit)
        self.assertListEqual(unc_calc.rp, [5, 10, 20, 50, 100, 250])
        self.assertEqual(unc_calc.calc_eai_exp, False)
        self.assertEqual(unc_calc.calc_at_event, False)

        self.assertEqual(unc_data.aai_agg_unc_df.size, unc_data.n_samples)
        self.assertEqual(unc_data.tot_value_unc_df.size, unc_data.n_samples)

        self.assertEqual(unc_data.freq_curve_unc_df.size,
                         unc_data.n_samples * len(unc_calc.rp))
        self.assertTrue(unc_data.eai_exp_unc_df.empty)
        self.assertTrue(unc_data.at_event_unc_df.empty)
    def run(self):
        """
        Run experiment
        """
        num_drivers = np.arange(1000, 6500, 500)
        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for drivers in num_drivers:
            self.config['RL_parameters'][
                'experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters'][
                'city_states_filename'] = "city_states.dill"
            self.config['RL_parameters']['num_drivers'] = drivers
            self.config['RL_parameters']['num_strategic_drivers'] = drivers
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_02")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_02")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
    def run(self):
        self.logger.info("Starting baselines")
        city_states = self.data_provider.read_city_states()
        baseline_list = self.config['baselines']['baseline_list']

        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        for count in range(10):
            for name in baseline_list:
                configs.append({
                    'name': name,
                    'count': count,
                    'config': self.config,
                    'city_states': city_states
                })

        results = pool.amap(self.run_baseline, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        episode_rewards = []
        for result in results:
            episode_rewards += result

        self.data_exporter.export_baseline_data(episode_rewards)
        self.logger.info("Finished baselines")
    def run(self):
        """
        Run experiment
        """
        num_drivers = self.config['RL_parameters']['num_drivers']
        percent_strategic_drivers = np.arange(0, 1.1, 0.1)
        num_strategic_drivers = [int(x * num_drivers) for x in percent_strategic_drivers]

        # Create a pool of processes
        num_processes = mp.cpu_count()
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for drivers in num_strategic_drivers:
            self.config['RL_parameters']['experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters']['num_strategic_drivers'] = drivers
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_05")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_05")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
def main(argv):
    print('Input/Output data folder: ', DATA_DIR)

    # set parallel processing
    pool = Pool()

    # exposures
    expo_dict = calc_exposure(DATA_DIR)

    # tracks
    sel_tr = calc_tracks(DATA_DIR, pool)

    # dictionary of tc per island
    tc_dict = calc_tc(expo_dict, sel_tr, DATA_DIR, pool)

    # damage per isl
    imp_dict = calc_imp(expo_dict, tc_dict, DATA_DIR)

    # damage irma
    get_irma_damage(imp_dict)

    # average annual impact
    aai_isl(imp_dict)

    # compute impact exceedance frequency
    get_efc_isl(imp_dict)

    # FIG03 and FIG04
    fig03_fig04(DATA_DIR, FIG_DIR)  # 5min

    # FIG 06
    fig06(DATA_DIR, FIG_DIR)

    pool.close()
    pool.join()
Beispiel #8
0
def main():

    processes = args.processes
    setup = experiment_setups.parse(args.setup)
    max_quantifier_length = args.max_quantifier_length
    model_size = args.model_size
    
    file_util = FileUtil(fileutil.base_dir(args.dest_dir, setup.name, max_quantifier_length, model_size))
    
    
    universe = setup.generate_models(model_size)
    
    folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir,setup.name,max_quantifier_length,model_size)
    os.makedirs("{0}".format(folderName), exist_ok=True)
    
    processpool = ProcessPool(nodes=processes)
    expression_generator = ExpressionGenerator(setup, model_size, universe, processpool)
    (generated_expressions_dict, expressions_by_meaning) = \
          expression_generator.generate_all_expressions(max_quantifier_length)
    
    print("{0} expressions!".format(len(expressions_by_meaning[bool].values())))
    
    file_util.dump_dill(expressions_by_meaning[bool], 'generated_expressions.dill')
    file_util.dump_dill(list(expressions_by_meaning[bool].values()), 'expressions.dill')
    file_util.dump_dill(list(expressions_by_meaning[bool].keys()), 'meanings.dill')
    
    processpool.close()
    processpool.join()
    
    print('Expression generation finished.')
    def retrieve_fields(self):

        ip, reqlink, reqtype, response, virtualm, keytemp = [], [], [], [], [], []
        bytes, avg_time, count, uniq_vis, total_vis = 0, 0, 0, 0, len(self.keys)
        for key in self.keys:
            keytemp.append(str(key))
        # creating an ordered dictionary containing log data retrieved from column_family
        log = self.cass_conn.multiget(keytemp)
        # starting a pool of 5 worker processes
        pool = Pool()
        pool.ncpus = 5
        for item in log.values():
            # appending lists with their respective values   
            ip.append(item['host']), reqlink.append(item['request_link']), reqtype.append(item['request_type']), response.append(str(item['response_code'])), virtualm.append(item['virtual_machine'])     
            if item['byte_transfer'] != '-':
                bytes += item['byte_transfer']
            if item['response_time'] != '-':
                avg_time += item['response_time']
                count += 1
        avg_time = avg_time/count
        # using the pool of workers to get results
        results = pool.map(self.unique_count, [ip, reqtype, reqlink, response, virtualm])
        pool.close()
        pool.join()
        uniq_vis = len(results[0][0])
        return self.time, results[0][0], results[0][1], results[1][0], results[1][1], results[2][0], results[2][1], results[3][0], results[3][1], results[4][0], results[4][1], bytes, avg_time, uniq_vis, total_vis
Beispiel #10
0
def main(args):

    setup = experiment_setups.parse(args.setup)
    processes = setup.processes
    max_quantifier_length = setup.max_quantifier_length
    model_size = setup.model_size

    file_util = FileUtil(
        fileutil.base_dir(setup.dest_dir, setup.name, max_quantifier_length,
                          model_size))

    folderName = "{0}/{1}_length={2}_size={3}".format(setup.dest_dir,
                                                      setup.name,
                                                      max_quantifier_length,
                                                      model_size)

    processpool = ProcessPool(nodes=processes)

    expressions = file_util.load_dill('expressions.dill')

    complexities = processpool.map(
        lambda ex: setup.measure_expression_complexity(
            ex, max_quantifier_length), expressions)

    file_util.dump_dill(complexities, 'expression_complexities.dill')

    processpool.close()
    processpool.join()

    print('Complexity Measuring finished.')
    def run(self):
        """
        Run experiment
        """
        num_drivers = np.arange(1000, 6500, 500)
        thresholds = np.arange(5, 55, 5)
        thresholds = np.insert(thresholds, 0, 2)
        combinations = list(itertools.product(num_drivers, thresholds))

        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for comb in combinations:
            self.config['RL_parameters'][
                'experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters']['num_drivers'] = comb[0]
            self.config['RL_parameters']['imbalance_threshold'] = comb[1]
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_04")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_04")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
Beispiel #12
0
    def test_calc_uncertainty_pool_pass(self):
        """Test compute the uncertainty distribution for an impact"""

        ent_iv, _ = make_costben_iv()
        _, _, haz_iv = make_input_vars()
        unc_calc = CalcCostBenefit(haz_iv, ent_iv)
        unc_data = unc_calc.make_sample(N=2)

        pool = Pool(n=2)
        try:
            unc_data = unc_calc.uncertainty(unc_data, pool=pool)
        finally:
            pool.close()
            pool.join()
            pool.clear()

        self.assertEqual(unc_data.unit, ent_dem().exposures.value_unit)

        self.assertEqual(unc_data.tot_climate_risk_unc_df.size,
                         unc_data.n_samples)
        self.assertEqual(
            unc_data.cost_ben_ratio_unc_df.size,
            unc_data.n_samples * 4  #number of measures
        )
        self.assertEqual(unc_data.imp_meas_present_unc_df.size, 0)
        self.assertEqual(
            unc_data.imp_meas_future_unc_df.size,
            unc_data.n_samples * 4 * 5  #All measures 4 and risks/benefits 5
        )
    def run(self):
        """
        Run experiment
        """
        days = [
            'Sunday_00_', 'Monday_00_', 'Tuesday_00_', 'Wednesday_00_',
            'Thursday_00_', 'Friday_00_', 'Saturday_00_', 'Sunday_01_',
            'Monday_01_', 'Tuesday_01_', 'Wednesday_01_', 'Thursday_01_',
            'Friday_01_', 'Saturday_01_', 'Sunday_02_', 'Monday_02_',
            'Tuesday_02_', 'Wednesday_02_', 'Thursday_02_', 'Friday_02_',
            'Saturday_02_', 'Sunday_03_', 'Monday_03_', 'Tuesday_03_',
            'Wednesday_03_', 'Thursday_03_', 'Friday_03_', 'Saturday_03_',
            'Sunday_04_', 'Monday_04_', 'Tuesday_04_', 'Wednesday_04_',
            'Thursday_04_', 'Friday_04_', 'Saturday_04_'
        ]

        num_drivers = [4000, 5000, 6000, 7000, 8000, 9000, 10000]

        imbalance_thresholds = [2]

        # Create a pool of processes
        num_processes = mp.cpu_count()
        self.logger.info("Processes: {}".format(num_processes))
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0

        for d in num_drivers:
            for threshold in imbalance_thresholds:
                for day in days:
                    self.config['RL_parameters']['num_drivers'] = d
                    self.config['RL_parameters']['num_strategic_drivers'] = d

                    self.config['RL_parameters'][
                        'imbalance_threshold'] = threshold
                    self.config['RL_parameters'][
                        'experiment'] = self.expt_name + "_" + str(count)
                    if os.path.isfile(self.config['app']['DATA_DIR'] +
                                      'city_states/' + day +
                                      'city_states.dill'):
                        self.config['RL_parameters'][
                            'city_states_filename'] = day + 'city_states.dill'
                        self.config['RL_parameters']['best_model_filename'] = (
                            day + str(d) + '_' + str(threshold) +
                            '_model.dill')
                        configs.append(deepcopy(self.config))
                        count += 1

        self.logger.info("Starting expt_07")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_07")
Beispiel #14
0
def opt_ind_params(perf_f, res, games, gids, role, pid, init_params, bounds):
    def min_f(params_test):
        return (-perf_f(params_test, res, games, gids, role, pid))

    pool = ProcessPool(nodes=mp.cpu_count())
    opt = scp.optimize.differential_evolution(min_f, bounds, workers=pool.map)
    return opt
    pool.close()
    pool.join()
    pool.clear()
def runGOanalysis(clusters, processes=10):
    df = pd.DataFrame()
    pool = ProcessPool(nodes=processes)
    newDf = pool.map(_runGOanalysis, clusters)
    pool.close()
    pool.join()
    df = pd.concat([df, newDf], axis=0)
    geneIndex = pd.read_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx'), index_col=0)
    geneIndex = pd.concat([geneIndex, newEntrez], axis=0)
    geneIndex.to_excel(os.path.join(goaResultDir, 'EntrezIndex.xlsx'))
    return(geneIndex)
    def test_read_raster_pool_pass(self):
        """Test from_raster constructor with pool"""
        from pathos.pools import ProcessPool as Pool
        pool = Pool()
        haz_fl = Hazard.from_raster([HAZ_DEMO_FL], haz_type='FL', pool=pool)
        haz_fl.check()

        self.assertEqual(haz_fl.intensity.shape, (1, 1032226))
        self.assertEqual(haz_fl.intensity.min(), -9999.0)
        self.assertAlmostEqual(haz_fl.intensity.max(), 4.662774085998535)
        pool.close()
        pool.join()
Beispiel #17
0
def multi_process(data_path, time_list):
    for time in time_list[:]:
        # print(time)
        base_path = arrow.get(time['ini']).format('YYYYMMDDHH')
        # --预报数据处理
        gefs_fcst = GEFSFcst(data_path['gefs_fcst'], time, base_path)
        p = ProcessPool(7)
        for n in range(21):
            # gefs_fcst.download(n)
            p.apipe(download, gefs_fcst, n)
        p.close()
        p.join()
        p.clear()
Beispiel #18
0
def main(args):

    processes = args.processes
    chunk_size = args.chunk_size
    setup = experiment_setups.parse(args.setup)
    max_quantifier_length = args.max_quantifier_length
    model_size = args.model_size

    folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir,
                                                      setup.name,
                                                      max_quantifier_length,
                                                      model_size)

    with open('{0}/generated_expressions.dill'.format(folderName),
              'rb') as file:
        expressions_by_meaning = dill.load(file)

    processpool = ProcessPool(nodes=processes)
    merger = PresuppositionMerger(setup, processpool, chunk_size)

    quantifiers_by_meaning = merger.add_presuppositions(expressions_by_meaning)

    processpool.close()
    processpool.join()

    print("Quantifiers generated.")

    generated_quantifiers = list(quantifiers_by_meaning.values())
    generated_meanings = list(quantifiers_by_meaning.keys())

    print("Quantifiers listed.")

    print("Saving files....")

    with open('{0}/generated_meanings.dill'.format(folderName), 'wb') as file:
        dill.dump(generated_meanings, file)

    print("File 1 saved")

    with open('{0}/generated_quantifiers.dill'.format(folderName),
              'wb') as file:
        dill.dump(generated_quantifiers, file)

    print("File 2 saved")

    with open('{0}/generated_quantifiers.txt'.format(folderName), 'w') as f:
        for quantifier in generated_quantifiers:
            f.write("{0}\n".format(quantifier))

    print('Generation finished')
Beispiel #19
0
    def get_city_states(self):
        """
        Creates city states from start time to end time
        :param:
        :return:
        """
        city_states = []
        start_time = self.start_time
        end_time = self.end_time

        # Create array of time slice values between the start and end time
        business_days = self.config['city_state_creator']['business_days']
        business_hours_start = self.config['city_state_creator'][
            'business_hours_start']
        business_hours_end = self.config['city_state_creator'][
            'business_hours_end']
        index = pd.date_range(start=start_time,
                              end=end_time,
                              freq=str(self.time_unit_duration) + 'min')

        # Filter only the required days and hours
        index = index[index.day_name().isin(business_days)]
        index = index[(index.hour >= business_hours_start)
                      & (index.hour <= business_hours_end)]
        time_slice_starts = index - timedelta(
            minutes=self.time_slice_duration / 2)
        time_slice_ends = index + timedelta(minutes=self.time_slice_duration /
                                            2)

        # Create arguments dictionary for parallelization
        self.parallel_args = self.create_parallel_args(index,
                                                       time_slice_starts,
                                                       time_slice_ends)

        # Create city states
        manager = Manager()
        city_states = manager.dict()
        N = len(index.values)

        # Create parallel pool
        self.logger.info("Creating parallelization pool")
        pool = ProcessPool(nodes=25)
        pool.map(self.get_city_state, ([city_states, t] for t in xrange(N)))
        pool.close()
        pool.join()
        pool.clear()
        self.logger.info("Finished creating city states")

        return dict(city_states)
Beispiel #20
0
 def extract_all(self, merge_file, locate, time):
     extract_path = self.data_path + 'extract_{}/'.format(
         time.format('YYYYMMDDHH'))
     try:
         os.makedirs(extract_path)
     except OSError:
         pass
     finally:
         p = ProcessPool(16)
         for lat, lon in locate[:]:
             p.apipe(self.extract_point, merge_file, extract_path, lat, lon)
         p.close()
         p.join()
         p.clear()
         os.remove(merge_file)
Beispiel #21
0
class MultiprocessingDistributor(DistributorBaseClass):
    """
    Distributor using a multiprocessing Pool to calculate the jobs in parallel on the local machine.
    """

    def __init__(self, n_workers, disable_progressbar=False, progressbar_title="Feature Extraction",
                 show_warnings=True):
        """
        Creates a new MultiprocessingDistributor instance

        :param n_workers: How many workers should the multiprocessing pool have?
        :type n_workers: int
        :param disable_progressbar: whether to show a progressbar or not.
        :type disable_progressbar: bool
        :param progressbar_title: the title of the progressbar
        :type progressbar_title: basestring
        :param show_warnings: whether to show warnings or not.
        :type show_warnings: bool
        """
        self.pool = Pool(nodes=n_workers)
        self.n_workers = n_workers
        self.disable_progressbar = disable_progressbar
        self.progressbar_title = progressbar_title

    def distribute(self, func, partitioned_chunks, kwargs):
        """
        Calculates the features in a parallel fashion by distributing the map command to a thread pool

        :param func: the function to send to each worker.
        :type func: callable
        :param partitioned_chunks: The list of data chunks - each element is again
            a list of chunks - and should be processed by one worker.
        :type partitioned_chunks: iterable
        :param kwargs: parameters for the map function
        :type kwargs: dict of string to parameter

        :return: The result of the calculation as a list - each item should be the result of the application of func
            to a single element.
        """
        return self.pool.imap(partial(func, **kwargs), partitioned_chunks)

    def close(self):
        """
        Collects the result from the workers and closes the thread pool.
        """
        self.pool.close()
        self.pool.terminate()
        self.pool.join()
    def test_est_comp_time_pass(self):

        exp_unc, _, haz_unc = make_imp_uncs()

        unc = Uncertainty({'exp': exp_unc, 'haz': haz_unc})

        unc.make_sample(N=1, sampling_kwargs={'calc_second_order': False})
        est = unc.est_comp_time(0.12345)
        self.assertEqual(est, 1 * (2 + 2) * 0.123)  # N * (D + 2)

        pool = Pool(nodes=4)
        est = unc.est_comp_time(0.12345, pool)
        self.assertEqual(est, 1 * (2 + 2) * 0.123 / 4)  # N * (D + 2)
        pool.close()
        pool.join()
        pool.clear()
Beispiel #23
0
def test_pathos_pp_callable () :
    """Test parallel processnig with pathos: ParallelPool  
    """
    logger = getLogger("ostap.test_pathos_pp_callable")         
    if not pathos :
        logger.error ( "pathos is not available" )
        return
    
    logger.info ('Test job submission with %s' %  pathos ) 
    
    if DILL_PY3_issue : 
        logger.warning ("test is disabled (DILL/ROOT/PY3 issue)" )
        return

    ## logger.warning ("test is disabled for UNKNOWN REASON")
    ## return

    from pathos.helpers import cpu_count
    ncpus = cpu_count  ()
    
    from pathos.pools import ParallelPool as Pool 

    pool = Pool ( ncpus )   
    logger.info ( "Pool is %s" %  ( type ( pool ).__name__ ) )

    pool.restart ( True ) 


    mh   = MakeHisto() 
    jobs = pool.uimap ( mh.process ,  [  ( i , n )  for  ( i , n ) in enumerate ( inputs ) ] )
    
    result = None 
    for h in progress_bar ( jobs , max_value = len ( inputs ) ) :
        if not result  : result = h
        else           : result.Add ( h )

    pool.close ()
    pool.join  ()
    pool.clear ()
    
    logger.info ( "Histogram is %s" % result.dump ( 80 , 10 )  )
    logger.info ( "Entries  %s/%s" % ( result.GetEntries() , sum ( inputs ) ) ) 
    
    with wait ( 1 ) , use_canvas ( 'test_pathos_pp_callable' ) : 
        result.draw (   ) 

    return result 
Beispiel #24
0
def opt_pop_params(perf_f, res, games, gids, init_params, bounds):
    def min_f(params):
        n_p1 = len(res[gids[0]]["params"][0])
        n_p2 = len(res[gids[0]]["params"][1])
        ll = 0
        for pid in range(n_p1):
            ll += perf_f(params, res, games, gids, 0, pid)
        for pid in range(n_p2):
            ll += perf_f(params, res, games, gids, 1, pid)
        return -ll

    pool = ProcessPool()
    opt = scp.optimize.differential_evolution(min_f, bounds, workers=pool.imap)
    pool.close()
    pool.join()
    pool.clear()
    # opt = scp.optimize.differential_evolution(min_f, bounds)
    return (opt)
Beispiel #25
0
    def run(self):
        """
        Run experiment
        """
        days = [
            'Sunday_00_', 'Monday_00_', 'Tuesday_00_', 'Wednesday_00_',
            'Thursday_00_', 'Friday_00_', 'Saturday_00_', 'Sunday_01_',
            'Monday_01_', 'Tuesday_01_', 'Wednesday_01_', 'Thursday_01_',
            'Friday_01_', 'Saturday_01_', 'Sunday_02_', 'Monday_02_',
            'Tuesday_02_', 'Wednesday_02_', 'Thursday_02_', 'Friday_02_',
            'Saturday_02_', 'Sunday_03_', 'Monday_03_', 'Tuesday_03_',
            'Wednesday_03_', 'Thursday_03_', 'Friday_03_', 'Saturday_03_'
        ]

        # Create a pool of processes
        num_processes = mp.cpu_count()
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0

        for day in days:
            self.config['RL_parameters'][
                'experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters'][
                'city_states_filename'] = day + 'city_states.dill'
            self.config['RL_parameters'][
                'best_model_filename'] = day + 'model.dill'
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_06")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_06")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
Beispiel #26
0
 def gap_stat(self, err_init, B=10):
     """
     Calcule les statistiques utiles dans le choix du nombre optimal de cluster.
     
     Paramètres d'entrée :
         err_init : matrice de la forme [nb_cluster; erreur de classification; erreur relative de classification; variance intra-classe]
         B : Nombre d'itération de k-means avec échantillons aléatoires.
         
     Paramètres de sortie :
         stat : matrice de la forme [nb_cluster; erreur de classification; erreur relative de classification; variance intra-classe;
                                     logarithme de l'erreur de classification; moyenne des log des erreurs avec echantillons aléatoires;
                                     différence entre les logs des erreurs obtenues et la moyenne des log des erreurs avec echantillons aléatoires;
                                     gap statistical | (différence des échantillons n) - (différence des échantillons n+1 * variance des log des erreurs avec echantillons aléatoires)]
     """
     pool = Pool(self.cpu)
     pool.close()
     pool.join()
     mini, maxi = np.min(self.data.data, axis=0), np.max(self.data.data,
                                                         axis=0)
     shape = self.data.data.shape
     log = np.log10(err_init[1])
     mean_alea, var_alea = [], []
     for i in range(1, self.nb_cluster + 1):
         err = []
         f = partial(self.__stat_i, mini=mini, maxi=maxi, shape=shape, i=i)
         pool.restart()
         err = list(pool.map(f, range(B)))
         pool.close()
         pool.join()
         err = np.log10(np.array(err))
         mean_alea.append(np.mean(err))
         var_alea.append(np.std(err))
     mean_alea = np.array(mean_alea)
     var_alea = np.array(var_alea) * np.sqrt(1 + (1 / float(B)))
     gap = mean_alea - log
     diff_gap = gap[0:-1] - (gap[1:] - var_alea[1:])
     diff_gap = np.hstack((diff_gap, 0))
     stat = np.vstack((err_init, log, mean_alea, gap, diff_gap))
     del pool
     return stat
def main(args):
    processes = args.processes
    setup = experiment_setups.parse(args.setup)
    max_quantifier_length = args.max_quantifier_length
    model_size = args.model_size

    file_util = FileUtil(fileutil.base_dir(args.dest_dir, setup.name, max_quantifier_length, model_size))

    folderName = "{0}/{1}_length={2}_size={3}".format(args.dest_dir, setup.name, max_quantifier_length, model_size)

    processpool = ProcessPool(nodes=processes)

    meanings = file_util.load_dill('meanings.dill')

    costs = processpool.map(measurer.measure_communicative_cost, meanings)

    file_util.dump_dill(costs, 'expression_costs.dill')

    processpool.close()
    processpool.join()

    print('Informativeness Measuring finished.')
Beispiel #28
0
    def run(self):
        """
        Run experiment
        """
        ind_percent = np.arange(0., 1.1, 0.1)
        reb_percent = np.arange(0., 1.1, 0.1)
        ind_percent[0] = 0.01
        reb_percent[0] = 0.01
        combinations = list(itertools.product(ind_percent, reb_percent))
        num_episodes = self.config['RL_parameters']['num_episodes']

        # Create a pool of processes
        num_processes = mp.cpu_count()
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for comb in combinations:
            self.config['RL_parameters']['experiment'] = self.expt_name + "_" + str(count)
            ind_episodes = int(comb[0] * num_episodes)
            reb_episodes = int(comb[1] * num_episodes)
            if (ind_episodes + reb_episodes) < num_episodes:
                self.config['RL_parameters']['ind_episodes'] = ind_episodes
                self.config['RL_parameters']['reb_episodes'] = reb_episodes
                configs.append(deepcopy(self.config))
                count += 1

        self.logger.info("Starting expt_01")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_01")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
    def run(self):
        """
        Run experiment
        """
        num_drivers = np.arange(1000, 6500, 500)
        objectives = ['pickups', 'revenue']
        combinations = list(itertools.product(num_drivers, objectives))

        # Create a pool of processes
        num_processes = mp.cpu_count()
        pool = ProcessPool(nodes=num_processes)

        configs = []
        count = 0
        for comb in combinations:
            self.config['RL_parameters'][
                'experiment'] = self.expt_name + "_" + str(count)
            self.config['RL_parameters'][
                'city_states_filename'] = "city_states.dill"
            self.config['RL_parameters']['num_drivers'] = comb[0]
            self.config['RL_parameters']['num_strategic_drivers'] = comb[0]
            self.config['RL_parameters']['objective'] = comb[1]
            configs.append(deepcopy(self.config))
            count += 1

        self.logger.info("Starting expt_03")

        results = pool.amap(self.run_rl_training, configs).get()
        pool.close()
        pool.join()
        pool.clear()

        self.logger.info("Finished expt_03")

        # Export best episode
        self.data_exporter.export_episode(results, self.expt_name + ".dill")
Beispiel #30
0
def multiProcessLit(genes, directory, rettype="full", processes=13):
    """
    Runs NLP on literature with Pathos parallel processing pool.

    parameters
    ----------
    genes: list of genes
    directory: directory containing literature for NLP
    processes (optional): int, number of parallel processe (default 13)
    """
    resultDirs = [
        "Sentences",
        "Categories",
        "Functions",
        "Regions",
        "CellTypes",
        "NTs",
        "Physio",
    ]
    for item in resultDirs:
        if not os.path.exists(
                os.path.join(clusterDirectory, item + "_" + cluster + "/")):
            os.mkdir(os.path.join(clusterDirectory,
                                  item + "_" + cluster + "/"))
    pathList = []
    for item in genes:
        if rettype == "full":
            fullpath = os.path.join(directory, "papers/" + item + "/")
        elif rettype == "abstract":
            fullpath = os.path.join(directory, "abstracts/" + item + "/")
        textFile = os.path.join(fullpath, "CombinedFullTexts.txt")
        pathList.append(textFile)
    pool = ProcessPool(nodes=processes)
    pool.map(multiProcessTextMinimal, pathList)
    pool.close()
    pool.join()
Beispiel #31
0
def run_multiple(func,
                 *args,
                 argsList=None,
                 kwargsList=None,
                 messageFn=None,
                 serial=False):
    """
    Makes use of the pathos.multiprocessing module to run a function simultanously multiple times.
    This is meant mainly to update multiple plots at the same time, which can accelerate significantly the process of visualizing data.

    All arguments passed to the function, except func, can be passed as specified in the arguments section of this documentation
    or as a list containing multiple instances of them.
    If a list is passed, each time the function needs to be run it will take the next item of the list.
    If a single item is passed instead, this item will be repeated for each function run.
    However, at least one argument must be a list, so that the number of times that the function has to be ran is defined.

    Arguments
    ----------
    func: function
        The function to be executed. It has to be prepared to recieve the arguments as they are provided to it (zipped).

        See the applyMethod() function as an example.
    *args:
        Contains all the arguments that are specific to the individual function that we want to run.
        See each function separately to understand what you need to pass (you may not need this parameter).
    argsList: array-like
        An array of arguments that have to be passed to the executed function.

        Can also be a list of arrays (see this function's description).

        WARNING: Currently it only works properly for a list of arrays. Didn't fix this because the lack of interest
        of argsList on Plot's methods (everything is passed as keyword arguments).
    kwargsList: dict
        A dictionary with the keyword arguments that have to be passed to the executed function.

        If the executed function is a Plot's method, these can be the settings, for example.

        Can also be a list of dicts (see this function's description).

    messageFn: function
        Function that recieves the number of tasks and nodes and needs to return a string to display as a description of the progress bar.
    serial: bool
        If set to true, multiprocessing is not used.

        This seems to have little sense, but it is useful to switch easily between multiprocessing and serial with the same code.

    Returns
    ----------
    results: list
        A list with all the returned values or objects from each function execution.
        This list is ordered, so results[0] is the result of executing the function with argsList[0] and kwargsList[0].  
    """

    #Prepare the arguments to be passed to the initSinglePlot function
    toZip = [*args, argsList, kwargsList]
    for i, arg in enumerate(toZip):
        if not isinstance(arg, (list, tuple, np.ndarray)):
            toZip[i] = itertools.repeat(arg)
        else:
            nTasks = len(arg)

    # Run things in serial mode in case it is demanded
    serial = serial or _MAX_NPROCS == 1 or nTasks == 1
    if serial:
        return [func(argsTuple) for argsTuple in zip(*toZip)]

    #Create a pool with the appropiate number of processes
    pool = Pool(min(nTasks, _MAX_NPROCS))
    #Define the plots array to store all the plots that we initialize
    results = [None] * nTasks

    #Initialize the pool iterator and the progress bar that controls it
    progress = tqdm.tqdm(pool.imap(func, zip(*toZip)), total=nTasks)

    #Set a description for the progress bar
    if not callable(messageFn):
        message = "Updating {} plots in {} processes".format(
            nTasks, pool.nodes)
    else:
        message = messageFn(nTasks, pool.nodes)

    progress.set_description(message)

    #Run the processes and store each result in the plots array
    for i, res in enumerate(progress):
        results[i] = res

    pool.close()
    pool.join()
    pool.clear()

    return results