Example #1
0
def plot_cats_qsims(dbs_dir, n_cpus=1):
    '''Plot discharge simulations for every catchment for every
    kfold using its prm_vecs.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    plot_gen = (cat_db for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_qsims, plot_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for plot_args in plot_gen:
            plot_cat_qsims(plot_args)

    return
Example #2
0
def plot_cats_vars_errors(dbs_dir, err_var_labs, n_cpus):

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    n_cpus = min(n_cats, n_cpus)

    cats_paths_gen = ((cat_db, err_var_labs) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_vars_errors, cats_paths_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for cat_paths in cats_paths_gen:
            plot_cat_vars_errors(cat_paths)

    return
Example #3
0
def plot_cats_prm_vecs(dbs_dir, n_cpus):
    '''Plot final parameter set from kfold for every catchments along with
    objective function value distribution.
    '''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    n_cpus = min(n_cats, n_cpus)

    opt_res_gen = (cat_db for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_prm_vecs, opt_res_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for opt_res in opt_res_gen:
            plot_cat_prm_vecs(opt_res)

    return
Example #4
0
def plot_cats_hbv_sim(dbs_dir,
                      water_bal_step_size,
                      full_flag=False,
                      wat_bal_flag=False,
                      show_warm_up_steps_flag=False,
                      n_cpus=1):
    '''Plot hbv simulations for every catchment for every kfold.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    const_args = (water_bal_step_size, full_flag, wat_bal_flag,
                  show_warm_up_steps_flag)

    plot_gen = ((cat_db, const_args) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_hbv_sim, plot_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for plot_args in plot_gen:
            plot_cat_hbv_sim(plot_args)

    return
Example #5
0
def plot_cats_kfold_effs(dbs_dir, hgs_db_path, compare_ann_cyc_flag, n_cpus):
    '''Plot the k-fold efficiency results.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs

    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    n_cpus = min(n_cats, n_cpus)

    const_args = (compare_ann_cyc_flag, hgs_db_path)
    cats_paths_gen = ((cat_db, const_args) for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_kfold_effs, cats_paths_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for cat_paths in cats_paths_gen:
            plot_cat_kfold_effs(cat_paths)

    return
Example #6
0
def plot_cats_best_prms_1d(dbs_dir, n_cpus):
    '''Plot every best kfold parameter set for all catchments.'''

    cats_dbs = glob(os.path.join(dbs_dir, 'cat_*.hdf5'))

    assert cats_dbs
    n_cats = len(cats_dbs)
    n_cpus = min(n_cats, n_cpus)

    cats_paths_gen = (cat_db for cat_db in cats_dbs)

    if (n_cpus > 1) and (n_cats > 1):
        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        print(list(mp_pool.uimap(plot_cat_best_prms_1d, cats_paths_gen)))

        mp_pool.clear()
        mp_pool.close()
        mp_pool.join()

    else:
        for cat_paths in cats_paths_gen:
            plot_cat_best_prms_1d(cat_paths)

    return
Example #7
0
def main():
    if os.path.exists('log\\' + GeneralConfig.ENV + '_' +
                      GeneralConfig.TOPIC_PREFIX + '_' +
                      GeneralConfig.ROWCOUNT_LOG_FILE):
        os.remove('log\\' + GeneralConfig.ENV + '_' +
                  GeneralConfig.TOPIC_PREFIX + '_' +
                  GeneralConfig.ROWCOUNT_LOG_FILE)

    pool = ProcessPool(nodes=cpu_count() - 1 or 1)

    # pool.amap(send_aftermarket_part, [AftermarketPartConfig.TOPIC], [AftermarketPartConfig.KAFKA_KEY])
    # pool.amap(send_description, [DescriptionConfig.TOPIC], [DescriptionConfig.KAFKA_KEY])
    # pool.amap(send_engineering_part, [EngineeringPartConfig.TOPIC], [EngineeringPartConfig.KAFKA_KEY])
    # pool.amap(send_engineering_part_function, [EngineeringPartFunctionConfig.TOPIC], [EngineeringPartFunctionConfig.KAFKA_KEY])
    # pool.amap(send_engineering_part_usage, [EngineeringPartUsageConfig.TOPIC], [EngineeringPartUsageConfig.KAFKA_KEY])
    # pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY])
    # pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC], [FeatureFamilyConfig.KAFKA_KEY])
    pool.amap(send_hierarchy, [HierarchyConfig.TOPIC],
              [HierarchyConfig.KAFKA_KEY])
    # pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC], [HierarchyIllustrationConfig.KAFKA_KEY])
    # pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC], [HierarchyUsageConfig.KAFKA_KEY])
    # pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC], [SectionCalloutConfig.KAFKA_KEY])
    # pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC], [SectionPartUsageConfig.KAFKA_KEY])
    # pool.amap(send_supersession, [SupersessionConfig.TOPIC], [SupersessionConfig.KAFKA_KEY])
    # pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY])
    # pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY])

    pool.close()
    pool.join()
Example #8
0
    def _prep_anomaly_bjs_mp(anoms_arr, bjs_arr, n_cpus, fig_out_dir):

        assert anoms_arr.shape == bjs_arr.shape

        _idxs = ret_mp_idxs(anoms_arr.shape[1], n_cpus)
        _idxs_list = [_idxs[i:i + 2] for i in range(n_cpus)]

        _anoms_gen = ((anoms_arr[:, _idxs_list[i][0]:_idxs_list[i][1]])
                      for i in range(n_cpus))

        _bjs_gen = ((bjs_arr[:, _idxs_list[i][0]:_idxs_list[i][1]])
                    for i in range(n_cpus))

        mp_pool = ProcessPool(n_cpus)
        mp_pool.restart(True)

        try:
            print(
                list(
                    mp_pool.uimap(Anomaly._plot_anomaly_bjs_cdf, _idxs_list,
                                  _anoms_gen, _bjs_gen,
                                  [fig_out_dir] * n_cpus)))

            mp_pool.clear()

        except Exception as msg:
            mp_pool.close()
            mp_pool.join()
            print('Error in _plot_anomaly_bjs_cdf:', msg)
        return
Example #9
0
def main_jagcat():
    if os.path.exists('log\\' + GeneralConfig.ENV + '_' +
                      GeneralConfig.TOPIC_PREFIX + '_' +
                      GeneralConfig.ROWCOUNT_LOG_FILE):
        os.remove('log\\' + GeneralConfig.ENV + '_' +
                  GeneralConfig.TOPIC_PREFIX + '_' +
                  GeneralConfig.ROWCOUNT_LOG_FILE)

    pool = ProcessPool(nodes=cpu_count() - 1 or 1)

    pool.amap(send_part_meta, [PartMetaConfig.TOPIC],
              [PartMetaConfig.KAFKA_KEY])
    pool.amap(send_intray, [IntrayConfig.TOPIC], [IntrayConfig.KAFKA_KEY])
    pool.amap(send_description, [DescriptionConfig.TOPIC],
              [DescriptionConfig.KAFKA_KEY])
    pool.amap(send_feature, [FeatureConfig.TOPIC], [FeatureConfig.KAFKA_KEY])
    pool.amap(send_feature_family, [FeatureFamilyConfig.TOPIC],
              [FeatureFamilyConfig.KAFKA_KEY])
    pool.amap(send_hierarchy, [HierarchyConfig.TOPIC],
              [HierarchyConfig.KAFKA_KEY])
    pool.amap(send_hierarchy_illustration, [HierarchyIllustrationConfig.TOPIC],
              [HierarchyIllustrationConfig.KAFKA_KEY])
    pool.amap(send_hierarchy_usage, [HierarchyUsageConfig.TOPIC],
              [HierarchyUsageConfig.KAFKA_KEY])
    pool.amap(send_section_callout, [SectionCalloutConfig.TOPIC],
              [SectionCalloutConfig.KAFKA_KEY])
    pool.amap(send_section_part_usage, [SectionPartUsageConfig.TOPIC],
              [SectionPartUsageConfig.KAFKA_KEY])
    pool.amap(send_vin, [VinConfig.TOPIC], [VinConfig.KAFKA_KEY])

    pool.close()
    pool.join()
def align_dataset(config_file: str):
    """Aligns an image dataset
    """
    config = AlignConfig(config_file)
    output_dir = os.path.expanduser(config.output_dir)
    os.makedirs(output_dir, exist_ok=True)

    dataset = get_dataset(config.input_dir)

    num_images = sum(len(i) for i in dataset)
    TIMER.max_value = num_images
    TIMER.start()

    num_processes = cast(int, min(config.num_processes, os.cpu_count()))
    if num_processes == -1:
        num_processes = os.cpu_count()
    if num_processes > 1:
        process_pool = ProcessPool(num_processes)
        process_pool.imap(align_person, zip(dataset, [config] * len(dataset)))
        process_pool.close()
        process_pool.join()
    else:
        for person in dataset:
            align_person((person, config))

    transform_to_lfw_format(output_dir, num_processes)

    TIMER.finish()
    print('Total number of images: %d' % int(NUM_IMAGES_TOTAL.value))
    print('Number of faces found and aligned: %d' % int(NUM_SUCESSFUL.value))
    print('Number of unsuccessful: %d' % int(NUM_UNSECESSFUL.value))
Example #11
0
    def normalize(self) -> None:
        """ Normalize ensemble """
        if not self.regenerate:
            try:
                self.load()
                return
            except FileNotFoundError:
                pass

        assert xor((self.normalizer_nld is not None
                    and self.normalizer_gsf is not None),
                   self.normalizer_simultan is not None), \
            "Either 'normalizer_nld' and 'normalizer_gsf' must be set, or " \
            "normalizer_simultan"

        gsfs = self.extractor.gsf
        nlds = self.extractor.nld

        self.LOG.info(f"Start normalization with {self.nprocesses} cpus")
        pool = ProcessPool(nodes=self.nprocesses)
        N = len(nlds)
        iterator = pool.imap(self.step, range(N), nlds, gsfs)
        self.res = list(tqdm(iterator, total=N))
        pool.close()
        pool.join()
        pool.clear()

        self.save()
Example #12
0
def make_cache():
    from grid2viz.src.manager import (
        scenarios,
        agents,
        make_episode_without_decorate,
        n_cores,
        retrieve_episode_from_disk,
        save_in_ram_cache,
        cache_dir,
    )

    from pathos.multiprocessing import ProcessPool

    if not os.path.exists(cache_dir):
        print(
            "Starting Multiprocessing for reading the best agent of each scenario"
        )

    # TODO: tous les agents n'ont pas forcément tourner sur exactement tous les mêmes scenarios
    # Eviter une erreur si un agent n'a pas tourné sur un scenario
    agent_scenario_list = [(agent, scenario) for agent in agents
                           for scenario in scenarios]

    agents_data = []
    if n_cores == 1:  # no multiprocess useful for debug if needed
        i = 0
        for agent_scenario in agent_scenario_list:
            agents_data.append(
                make_episode_without_decorate(agent_scenario[0],
                                              agent_scenario[1]))
            i += 1
    else:
        pool = ProcessPool(n_cores)
        agents_data = list(
            pool.imap(
                make_episode_without_decorate,
                [agent_scenario[0]
                 for agent_scenario in agent_scenario_list],  # agents
                [agent_scenario[1] for agent_scenario in agent_scenario_list],
            )
        )  # scenarios #we go over all agents and all scenarios for each agent
        pool.close()
        print("Multiprocessing done")

    #####
    # saving data on disk
    i = 0
    for agent_scenario in agent_scenario_list:
        print(i)
        agent = agent_scenario[0]
        episode_name = agent_scenario[1]
        agent_episode = agents_data[i]
        if agent_episode is not None:
            episode_data = retrieve_episode_from_disk(
                agent_episode.episode_name, agent_episode.agent)

            agent_episode.decorate(episode_data)
            save_in_ram_cache(agent_episode.episode_name, agent_episode.agent,
                              agent_episode)
        i += 1
def correction_factor(p, number_of_runs, method, X, y, n_jobs=None):

    # Setup parallel job
    if n_jobs == -1:
        n_jobs = cpu_count()
    elif n_jobs == None:
        n_jobs = 1

    pool = Pool(n_jobs, maxtasksperchild=1000)

    def run(_):

        # Artificially falsify
        y_f = falsify(y, p, random_state=_)

        # Correct labels
        y_corrected = method.fit_transform(X, y_f)

        N = X.shape[0]
        return ((y == y_corrected).sum() - (1 - p) * N) / (p * N)

    factor = np.array(pool.map(run, range(number_of_runs)))

    # Close the pool again
    pool.close()
    pool.join()
    pool.clear()

    return np.mean(factor), np.std(factor)
def pare_multi_process(urls_and_people):
    print("[INFO] Paring and downlaoding all image urls with {} processes".
          format(NUM_PROCESSES))
    urls, person = zip(*urls_and_people)
    pare_pool = ProcessPool(NUM_PROCESSES)
    pare_pool.imap(safe_pare_matches_and_download, urls, person)
    pare_pool.close()
    pare_pool.join()
    print("[INFO] Done paring and downlaoding all image urls")
Example #15
0
    def _prepare_eigen(self):
        """
        calculate eigen values and vectors for all kpts and save.
        Note that the convention 2 is used here, where the 
        phase factor is e^(ik.R), not e^(ik.(R+rj-ri))
        """
        nkpts = len(self.kpts)
        self.evals = np.zeros((nkpts, self.nbasis), dtype=float)
        self.nkpts = nkpts
        self.H0 = np.zeros((self.nbasis, self.nbasis), dtype=complex)
        self.evecs = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex)
        H = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex)
        if not self.is_orthogonal:
            self.S = np.zeros((nkpts, self.nbasis, self.nbasis), dtype=complex)
        else:
            self.S = None
        if self.nproc == 1:
            results = map(self.tbmodel.HSE_k, self.kpts)
        else:
            executor = ProcessPool(nodes=self.nproc)
            results = executor.map(self.tbmodel.HSE_k, self.kpts,
                                   [2] * len(self.kpts))
            executor.close()
            executor.join()
            executor.clear()

        for ik, result in enumerate(results):
            if self.is_orthogonal:
                H[ik], _, self.evals[ik], self.evecs[ik] = result
            else:
                H[ik], self.S[ik], self.evals[ik], self.evecs[ik] = result
            self.H0 += H[ik] / self.nkpts

        self.evals, self.evecs = self._reduce_eigens(self.evals,
                                                     self.evecs,
                                                     emin=self.efermi - 10.0,
                                                     emax=self.efermi + 10.1)
        if self._use_cache:
            evecs = self.evecs
            self.evecs_shape = self.evecs.shape
            self.evecs = np.memmap(os.path.join(self.cache_path, 'evecs.dat'),
                                   mode='w+',
                                   shape=self.evecs.shape,
                                   dtype=complex)
            self.evecs[:, :, :] = evecs[:, :, :]
            if self.is_orthogonal:
                self.S = None
            else:
                S = self.S
                self.S = np.memmap(os.path.join(self.cache_path, 'S.dat'),
                                   mode='w+',
                                   shape=(nkpts, self.nbasis, self.nbasis),
                                   dtype=complex)
                self.S[:] = S[:]
            del self.evecs
            if not self.is_orthogonal:
                del self.S
Example #16
0
def parallelize_simulations(simulation_execs: List[Callable],
                            var_dict_list: List[VarDictType],
                            states_lists: List[StatesListsType],
                            configs_structs: List[ConfigsType],
                            env_processes_list: List[EnvProcessesType],
                            Ts: List[range], SimIDs, Ns: List[int],
                            ExpIDs: List[int], SubsetIDs, SubsetWindows,
                            configured_n):

    print(f'Execution Mode: parallelized')
    params = list(
        zip(simulation_execs, var_dict_list, states_lists, configs_structs,
            env_processes_list, Ts, SimIDs, Ns, SubsetIDs, SubsetWindows))

    len_configs_structs = len(configs_structs)

    unique_runs = Counter(SimIDs)
    sim_count = max(unique_runs.values())
    highest_divisor = int(len_configs_structs / sim_count)

    new_configs_structs, new_params = [], []
    for count in range(sim_count):
        if count == 0:
            new_params.append(params[count:highest_divisor])
            new_configs_structs.append(configs_structs[count:highest_divisor])
        elif count > 0:
            new_params.append(params[count * highest_divisor:(count + 1) *
                                     highest_divisor])
            new_configs_structs.append(
                configs_structs[count * highest_divisor:(count + 1) *
                                highest_divisor])

    def threaded_executor(params):
        tp = TPool()
        if len_configs_structs > 1:
            results = tp.map(
                lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8],
                               t[9], configured_n), params)
        else:
            t = params[0]
            results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8],
                           t[9], configured_n)

        tp.close()
        return results

    pp = PPool()
    results = flatten(
        list(pp.map(lambda params: threaded_executor(params), new_params)))
    pp.close()
    pp.join()
    pp.clear()
    # pp.restart()

    return results
Example #17
0
def accuracy(p, number_of_runs, method, X, y, clf = None, n_jobs = None):
    '''
    Returns: (array{mean_acc_corrected, mean_acc_false}, array{stddev_acc_corrected, stddev_acc_false})
    '''

    # Setup parallel job
    if n_jobs == -1:
        n_jobs = cpu_count()
    elif n_jobs == None:
        n_jobs = 1

    pool = Pool(n_jobs, maxtasksperchild = 1000)


    if clf is None:
        clf = NNC(n_neighbors= 1)

    def run(_):

        # Artificially falsify
        y_f = falsify(y, p, random_state = _)

        # Correct labels
        y_corrected = method.fit_transform(X, y_f)

        # Set up 10-fold-Cross validation
        train_corr, test_corr = kfold(10, X, y, y_corrected)
        train_f, test_f = kfold(10, X, y, y_f)

        score = np.zeros((2, 10))

        # Calc scores
        for fold in range(10):

            train_X, train_y = train_corr[fold]
            test_X, test_y = test_corr[fold]
            clf.fit(train_X, train_y)
            score[0, fold] = clf.score(test_X, test_y)

            train_X, train_y = train_f[fold]
            test_X, test_y = test_f[fold]
            clf.fit(train_X, train_y)
            score[1, fold] = clf.score(test_X, test_y)

        # Average
        return np.mean(score, axis = 1).tolist()

    acc = np.array(pool.map(run, range(number_of_runs)))

    # Close the pool again
    pool.close()
    pool.join()
    pool.clear()

    return np.mean(acc, axis = 0), np.std(acc, axis = 0)
Example #18
0
def MyProcessPool(nodes=None):
    if nodes is None or nodes > 1:
        p = ProcessPool(nodes)
        try:
            yield p
        finally:
            p.close()
            p.join()
            p.clear()
    else:
        #print("Using PseudoPool!")
        yield PseudoPool()
Example #19
0
    def calculate_all(self):
        """
        The top level.
        """
        print("Green's function Calculation started.")

        widgets = [
            ' [',
            progressbar.Timer(),
            '] ',
            progressbar.Bar(),
            ' (',
            progressbar.ETA(),
            ') ',
        ]

        bar = progressbar.ProgressBar(maxval=self.contour.npoints,
                                      widgets=widgets)
        bar.start()
        rhoRs = []
        GRs = []
        AijRs = {}
        if self.np > 1:
            executor = ProcessPool(nodes=self.np)
            results = executor.map(self.get_AijR_rhoR, self.contour.path)
        else:
            results = map(self.get_AijR_rhoR, self.contour.path)

        for i, result in enumerate(results):
            bar.update(i)
            for iR, R in enumerate(self.R_ijatom_dict):
                for (iatom, jatom) in self.R_ijatom_dict[R]:
                    if (R, iatom, jatom) in AijRs:
                        AijRs[(R, iatom, jatom)].append(result[0][R, iatom,
                                                                  jatom])
                    else:
                        AijRs[(R, iatom, jatom)] = []
                        AijRs[(R, iatom, jatom)].append(result[0][R, iatom,
                                                                  jatom])
            rhoRs.append(result[1])
        if self.np > 1:
            executor.close()
            executor.join()
            executor.clear()

        #self.save_AijRs(AijRs)
        self.integrate(rhoRs, AijRs)

        self.get_rho_atom()
        self.A_to_Jtensor()
        bar.finish()
Example #20
0
 def process_executor(params):
     if len_configs_structs > 1:
         pp = PPool(processes=len_configs_structs)
         results = pp.map(
             lambda t: t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8],
                            t[9], configured_n), params)
         pp.close()
         pp.join()
         pp.clear()
     else:
         t = params[0]
         results = t[0](t[1], t[2], t[3], t[4], t[5], t[6], t[7], t[8],
                        t[9], configured_n)
     return results
Example #21
0
    def threaded_contents_to_text(
        content_series,
        processes=None,
        none_content='raise',
    ):
        """Threaded version of content_to_text method

        It takes as input a series which index is the uid of the products,
        and the values are the content (in the form of bytes) of the
        documents.
        processes argument is the number of processes to launch. If omitted,
        it defaults to the number of cpu cores on the machine.
        none_content arg can be 'raise' (default) or to_empty
        """
        processer = partial(
            PDFDecoder.content_to_text,
            none_content=none_content,
        )
        processes = processes if processes else cpu_count()
        print(f'Launching {processes} processes.')
        in_ds = content_series.apply(BytesIO)

        # Pool with context manager do not seem to work due to issue 38501 of
        # standard python library. It hangs when running tests through pytest
        # see: https://bugs.python.org/issue38501
        # Below content should be tested again whenever this issue is closed
        #
        # with Pool(nodes=processes) as pool:
        #     tuples = (list(in_ds.index),
        #               pool.map(processer, in_ds))
        #
        # End of block

        # This temporary solution should be removed when tests mentioned above
        # are successful.
        # This just closes each pool after execution or exception.
        try:
            pool = Pool(nodes=processes)
            pool.restart(force=True)
            tuples = (list(in_ds.index), pool.map(processer, in_ds))
        except Exception:
            pool.close()
            raise
        pool.close()
        # End of block

        ds = pd.Series(tuples[1], index=tuples[0])
        return (ds)
Example #22
0
    def threaded_texts_to_blocks(text_series,
                                 processes=None,
                                 split_func=lambda x: x.split('\n\n'),
                                 return_type='along_index'):
        """Threaded version of text_to_blocks_series method

        It takes as input a series which index is the uid of the products,
        and the values are the content (in the form of bytes) of the
        documents..
        processes argument is the number of processes to launch. If omitted,
        it defaults to the number of cpu cores on the machine.
        As for text_to_blocks_series function, return_type can be 'along_axis'
        or 'list_like'.
        """
        processer = partial(PDFDecoder.text_to_blocks_series,
                            split_func=split_func,
                            return_type=return_type)
        processes = processes if processes else cpu_count()
        print(f'Launching {processes} processes.')

        # Pool with context manager do not seem to work due to issue 38501 of
        # standard python library. It hangs when running tests through pytest
        # see: https://bugs.python.org/issue38501
        # Below content should be tested again whenever this issue is closed
        #
        # with Pool(nodes=processes) as pool:
        #     ds_list = pool.map(processer, text_series, text_series.index)
        #
        # End of block

        # This temporary solution should be removed when tests mentioned above
        # are successful.
        # This just closes each pool after execution or exception.
        try:
            pool = Pool(nodes=processes)
            pool.restart(force=True)
            ds_list = pool.map(processer, text_series, text_series.index)
        except Exception:
            pool.close()
            raise
        pool.close()
        # End of block

        ds = pd.concat(ds_list, axis=0)
        return (ds)
Example #23
0
    def avaliacao(self, populacao):

        n = len(populacao)

        def steps(k):
            individuo = populacao[k, :]
            obj = self.funcao_objetivo(individuo)
            return obj

        ncpu = cpu_count()
        pool = ProcessPool(nodes=ncpu)
        pesos = array(pool.map(steps, range(n)))
        pool.close()
        pool.join()
        pool.clear()
        shutdown()

        return pesos
Example #24
0
    def apply_data(self, data: 'Data', method: Callable) -> 'Data':
        """Applies 'method' to 'data' across several cores.

        Args:
            data ('Data'): instance with a stored pandas DataFrame.
            method (Callable): callable method or function to apply to 'data'.

        Returns:
            'Data': with 'method' applied.

        """
        dfs = np.array_split(data.data, mp.cpu_count(), axis=0)
        pool = Pool()
        data.data = np.vstack(pool.map(method, dfs))
        pool.close()
        pool.join()
        pool.clear()
        return data
Example #25
0
def main(args):
    log.info('----------start processing---------')
    clips_dir = os.path.join(args.source_dir, 'clips')
    if args.follow:
        datasets_file = glob(args.source_dir + '/*.tsv')
    else:
        validated_tsv = args.source_dir + '/validated.tsv'
        new_path = dataset_split(validated_tsv, args.source_dir)
        datasets_file = glob(new_path + '/*.tsv')
    if args.num_process == 1:
        for tsv in tqdm(datasets_file):
            each_tsv(tsv, clips_dir, args.target_dir)
    else:
        targets = [args.target_dir] * len(datasets_file)
        clips_dirs = [clips_dir] * len(datasets_file)
        pool = Pool(args.num_process)
        pool.map(each_tsv, datasets_file, clips_dirs, targets)
        pool.close()
        pool.join()
Example #26
0
	def run( self ):
		# from pathos.multiprocessing import Pool
		from pathos.multiprocessing import ProcessPool as Pool
		args = self._interpna_setup( )
		pool = Pool( processes=self.ncpus )
		out = pool.map( self._interpna, args[:400] )
		pool.close()
		lons = self._lonpc
		# stack em and roll-its axis so time is dim0
		dat = np.rollaxis( np.dstack( out ), -1 )
		if self._rotated == True: # rotate it back
			dat, lons = self.rotate( dat, lons, to_pacific=False )
		# place back into a new xarray.Dataset object for further processing
		# function to make a new xarray.Dataset object with the mdata we need?
		# ds = self.ds
		# var = ds[ self.variable ]
		# setattr( var, 'data', dat )
		# self.ds = ds
		print( 'ds interpolated updated into self.ds' )
		return dat
Example #27
0
    def calculate_all(self):
        """
        The top level.
        """
        print("Green's function Calculation started.")

        widgets = [
            ' [',
            progressbar.Timer(),
            '] ',
            progressbar.Bar(),
            ' (',
            progressbar.ETA(),
            ') ',
        ]
        bar = progressbar.ProgressBar(maxval=len(self.contour.path),
                                      widgets=widgets)
        bar.start()
        if self.np == 1:
            results = map(self.get_AijR_rhoR, self.contour.path)
        else:
            pool = ProcessPool(nodes=self.np)
            results = pool.map(self.get_AijR_rhoR, self.contour.path)
        for i, result in enumerate(results):
            bar.update(i)
            rup, rdn, Jorb_list, JJ_list = result
            self.rho_up_list.append(rup)
            self.rho_dn_list.append(rdn)
            for iR, R in enumerate(self.R_ijatom_dict):
                for (iatom, jatom) in self.R_ijatom_dict[R]:
                    key = (R, iatom, jatom)
                    self.Jorb_list[key].append(Jorb_list[key])
                    self.JJ_list[key].append(JJ_list[key])
        if self.np > 1:
            pool.close()
            pool.join()
            pool.clear()
        self.integrate()
        self.get_rho_atom()
        self.A_to_Jtensor()
        bar.finish()
Example #28
0
def main(args):
    if len(args.input) < 2:
        print("Please name at least one STAR file and an output directory")
        return 1

    if args.apix is None:
        print("Using pixel size computed from STAR files")

    def do_job(star):
        try:
            mrc = os.path.join(args.output,
                               os.path.basename(star).replace(".star", ".mrc"))
            print("Starting reconstruction of %s" % star)
            do_reconstruct(star, mrc, args.apix, args.sym, args.ctf)
            print("Wrote %s reconstruction to %s" % (star, mrc))
            if args.mask is not None:
                masked_mrc = mrc.replace(".mrc", "_masked.mrc")
                do_mask(mrc, masked_mrc, args.mask)
                print("Wrote masked map %s" % masked_mrc)
            if args.mask is not None and args.delete_unmasked:
                delete_unmasked(mrc, masked_mrc)
                print("Overwrote %s with %s" % (mrc, masked_mrc))
        except Exception as e:
            print("Failed on %s" % star)
        return 0

    pool = Pool(nodes=args.nproc)

    #pool.apipe(do_job, args.input)
    results = pool.imap(do_job, args.input)
    codes = list(results)

    if pool is not None:
        pool.close()
        pool.join()
        pool.terminate()

    return 0
Example #29
0
def compute_seq_distances(sequences, affinity=sequence_distance,
                          nb_jobs=NB_THREADS):
    """ compute matrix of all distances

    :param [] sequences: list of all sequences
    :param func affinity: function specify the sample affinity
    :param int nb_jobs: number jobs running in parallel
    :return ndarray:

    >>> ss = [['a', 'b', 'a', 'c'], ['a', 'a', 'b', 'a'], ['b', None, 'b', 'a']]
    >>> compute_seq_distances(ss, affinity=sequence_distance)
    array([[0.  , 0.25, 0.5 ],
           [0.25, 0.  , 0.25],
           [0.5 , 0.25, 0.  ]])
    >>> ss = [['hi', 'there', 'how', 'are', 'you'],
    ...       ['hi', 'how', 'are', 'you'],
    ...       ['hi', 'are', 'you', 'there']]
    >>> compute_seq_distances(ss)
    array([[0. , 0.2, 0.6],
           [0.2, 0. , 0.5],
           [0.6, 0.5, 0. ]])
    """
    idxs = [(i, j) for i in range(len(sequences))
            for j in range(i, len(sequences))]
    idx_lt = (((i, j), (sequences[i], sequences[j])) for i, j in idxs)
    dists = np.zeros((len(sequences), len(sequences)))

    _wrap_dist = partial(wrap_distance, similar_distance=affinity)
    pool = ProcessPool(nb_jobs)

    for idx, d in pool.imap(_wrap_dist, idx_lt):
        dists[idx[0], idx[1]] = d
        dists[idx[1], idx[0]] = d

    pool.close()
    pool.join()
    pool.clear()
    return dists
Example #30
0
def multiprocess(func: Callable,
                 all_urls: List[List[str]],
                 people: List[str],
                 total_count: int,
                 info: str) -> Tuple[List[List[str]], List[str]]:
    print("[INFO] {} with {} processes".format(info, NUM_PROCESSES))
    global TIMER
    widgets_match = ['{}: '.format(info), pb.Percentage(), ' ',
                     pb.Bar(marker=pb.RotatingMarker()), ' ', pb.ETA()]
    TIMER = pb.ProgressBar(widgets=widgets_match, maxval=total_count).start()

    if NUM_PROCESSES > 1:
        process_pool = ProcessPool(NUM_PROCESSES)
        urls_and_people = process_pool.imap(func, all_urls, people)
        process_pool.close()
        process_pool.join()
        filtered_urls, people = zip(*urls_and_people)
    else:
        filtered_urls = []
        for urls, person in zip(all_urls, people):
            filtered, person = func(urls, person)
            filtered_urls.append(filtered)
    print("[INFO] Done {}".format(info))
    return filtered_urls, people