Python Parallel Examples, joblib.Parallel Python Examples

Example #1

0

Show file

File: experiment_01_acs.py Project: iterater/health-des

        acs_event_gen,
        n_surgery,
        background_surgery_gen,
        background_surgery_duration_gen,
        bg_scale,
        target_scale,
        simulation_time,
        use_queueing=queue,
    )
    sim_stats = dept_des.get_queue_statistics(sim_res)
    sim_stats["BG_SCALE"] = bg_scale
    sim_stats["TARGET_SCALE"] = target_scale
    sim_stats["N_SURG"] = n_surgery
    print(run_id, sim_stats)
    return sim_stats


if __name__ == "__main__":
    total_log = []
    for tg_s in [0.5, 1.0, 1.5, 2.0]:
        for bg_s in [0.5, 1.0, 1.5, 2.0]:
            for nps in [1, 2, 3, 4, 5]:
                run_res = joblib.Parallel(n_jobs=6)(joblib.delayed(
                    single_experiment_run)(tg_s, bg_s, nps, True, i_run)
                                                    for i_run in range(100))
                total_log.extend(run_res)
    total_log_df = pd.DataFrame(total_log, columns=total_log[0].keys())
    total_log_df.to_csv("logs" + ps + "queue-stats-" +
                        datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") +
                        ".csv")

Example #2

0

Show file

File: descriptors.py Project: rnithin1/school

def get_X(paths):
    delayed_get_descriptors = \
        joblib.delayed(di.descriptors.cached_get_descriptors)
    X = joblib.Parallel(n_jobs=-1, verbose=10)(
            delayed_get_descriptors(path) for path in paths)
    return np.vstack(X)

Example #3

0

Show file

File: generalized_islands.py Project: MicroPhen/pyFOOMB

    def create_archipelago(
        unknowns: list,
        optimizers: list,
        optimizers_kwargs: list,
        pg_problem: pygmo.problem,
        rel_pop_size: float,
        archipelago_kwargs: dict,
        log_each_nth_gen: int,
        report_level: int,
    ) -> PyfoombArchipelago:
        """
        Helper method for parallelized estimation using the generalized island model.
        Creates the archipelago object for running several rounds of evolutions.

        Arguments
        ---------
            unknowns : list
                The unknowns, sorted alphabetically and case-insensitive. 
            optimizers : list
                A list of optimizers to be used on individual islands. 
            optimizers_kwargs : list
                A list of corresponding kwargs.
            pg_problem : pygmo.problem
                An pygmo problem instance.
            archipelago_kwargs : dict
                Additional kwargs for archipelago creation.
            log_each_nth_gen : int
                Specifies at which each n-th generation the algorithm stores logs. 
            report_level : int
                Prints information on the archipelago creation for values >= 1.

        Returns
        -------
            archipelago : PyfoombArchipelago
        """

        _cpus = joblib.cpu_count()

        # There is one optimizer with a set of kwargs
        if len(optimizers) == 1 and len(optimizers_kwargs) == 1:
            optimizers = optimizers * _cpus
            optimizers_kwargs = optimizers_kwargs * _cpus
        # Several optimizers with the same kwargs
        elif len(optimizers) > 1 and len(optimizers_kwargs) == 1:
            optimizers_kwargs = optimizers_kwargs * len(optimizers)
        # Several kwargs for the same optimizer
        elif len(optimizers) == 1 and len(optimizers_kwargs) > 1:
            optimizers = optimizers * len(optimizers_kwargs)
        elif len(optimizers) != len(optimizers_kwargs):
            raise ValueError(
                'Number of optimizers does not match number of corresponding kwarg dicts'
            )

        # Get the optimizer intances
        algos = [
            PygmoOptimizers.get_optimizer_algo_instance(
                name=_optimizers, kwargs=_optimizers_kwargs) for _optimizers,
            _optimizers_kwargs in zip(optimizers, optimizers_kwargs)
        ]

        # Update number of islands
        n_islands = len(algos)

        if report_level >= 1:
            print(
                f'Creating archipelago with {n_islands} islands. May take some time...'
            )

        pop_size = int(numpy.ceil(rel_pop_size * len(unknowns)))
        prop_create_args = ((pg_problem, pop_size,
                             seed * numpy.random.randint(0, 1e4))
                            for seed, pop_size in enumerate([pop_size] *
                                                            n_islands))
        try:
            parallel_verbose = 0 if report_level == 0 else 1
            with joblib.parallel_backend('loky', n_jobs=n_islands):
                pops = joblib.Parallel(verbose=parallel_verbose)(map(
                    joblib.delayed(
                        ArchipelagoHelpers.parallel_create_population),
                    prop_create_args))
        except Exception as ex:
            print(
                f'Parallelized archipelago creation failed, falling back to sequential\n{ex}'
            )
            pops = (
                ArchipelagoHelpers.parallel_create_population(prop_create_arg)
                for prop_create_arg in prop_create_args)

        # Now create the empyty archipelago
        if not 't' in archipelago_kwargs.keys():
            archipelago_kwargs['t'] = pygmo.fully_connected()
        archi = PyfoombArchipelago(**archipelago_kwargs)
        archi.set_migrant_handling(pygmo.migrant_handling.preserve)

        # Add the populations to the archipelago and wait for its construction
        with contextlib.redirect_stdout(io.StringIO()):
            for _pop, _algo in zip(pops, algos):
                if log_each_nth_gen is not None:
                    _algo.set_verbosity(int(log_each_nth_gen))
                _island = pygmo.island(algo=_algo,
                                       pop=_pop,
                                       udi=pygmo.mp_island())
                archi.push_back(_island)
        archi.wait_check()

        return archi

Example #4

0

Show file

    jl.dump((sampler_name, sampler), sampler_file, compress=4)
    return sampler_file


def filter_file_list(data_files):
    new_list = []
    for data_file in data_files:
        sampler_name = splitext(basename(data_file))[0]
        sampler_file = 'ramp_fits/sampler/{:s}.gz'.format(sampler_name)
        if exists(sampler_file):
            print('Skipping %s' % data_file)
        else:
            new_list.append(data_file)
    return new_list


if __name__ == "__main__":
    if len(sys.argv) == 1:
        from glob import glob
        data_files = glob('data/ramp_data/*.csv')
    else:
        data_files = sys.argv[1:]

    data_files = filter_file_list(data_files)
    print('Number of data files: %g' % len(data_files))

    with jl.Parallel(n_jobs=4, verbose=20) as par:
        par(
            jl.delayed(process_input_file)(data_file)
            for data_file in data_files)

Example #5

0

Show file

File: rfe.py Project: octomike/PyMVPA

    def _train(self, dataset):
        pmeasure = ProxyMeasure(
            self.lrn,
            postproc=BinaryFxNode(self.errorfx, self.lrn.space),
            skip_train=not self.
            train_pmeasure  # do not train since fmeasure will
        )

        # First we need to replicate our RFE construct but this time
        # with pmeasure for the classifier
        rfe = RFE(
            self.fmeasure,
            pmeasure,
            Splitter('partitions'),
            fselector=self.fselector,
            bestdetector=None,
            train_pmeasure=self.train_pmeasure,
            stopping_criterion=None,  # full "track"
            update_sensitivity=self.update_sensitivity,
            enable_ca=['errors', 'nfeatures'])

        errors, nfeatures = [], []

        if __debug__:
            debug("RFEC", "Stage 1: initial nested CV/RFE for %s", (dataset, ))

        if self.nproc != 1 and externals.exists('joblib'):
            nested_results = jl.Parallel(self.nproc)(
                jl.delayed(_process_partition)(rfe, partition)
                for partition in self.partitioner.generate(dataset))
        else:
            nested_results = [
                _process_partition(rfe, partition)
                for partition in self.partitioner.generate(dataset)
            ]

        # unzip
        errors = [x[0] for x in nested_results]
        nfeatures = [x[1] for x in nested_results]

        self.ca.nested_nfeatures = nfeatures
        self.ca.nested_errors = errors

        # mean errors across splits and find optimal number
        errors_mean = np.mean(errors, axis=0)
        nfeatures_mean = np.mean(nfeatures, axis=0)
        # we will take the "mean location" of the min to stay
        # within the most 'stable' choice

        mins_idx = np.where(errors_mean == np.min(errors_mean))[0]
        min_idx = mins_idx[int(len(mins_idx) / 2)]
        min_error = errors_mean[min_idx]
        assert (min_error == np.min(errors_mean))
        nfeatures_min = nfeatures_mean[min_idx]

        if __debug__:
            debug(
                "RFEC", "Choosing among %d choices to have %d features with "
                "mean error=%.2g (initial mean error %.2g)",
                (len(mins_idx), nfeatures_min, min_error, errors_mean[0]))

        self.nfeatures_min = nfeatures_min

        if __debug__:
            debug(
                "RFEC", "Stage 2: running RFE on full training dataset to "
                "obtain the best %d features" % nfeatures_min)

        super(SplitRFE, self)._train(dataset)

Example #6

0

Show file

File: Pytorch20ch_to_specgram.py Project: kimiaoi/Bat_Yubinaga

            continue

        try:
            B, F, T = mlab.specgram(
                d[i, st:et],
                NFFT=128,
                Fs=500000,  # 500kHz
                window=mlab.window_hanning,
                noverlap=126)

            # get B[2:34, :] --> [32, 8270]
            B = B[2:34, :]

            B_all.append(B)
        except:
            pass
    B_all = np.dstack(B_all)  # 3D array
    B_all /= 40000  # ad-hoc normalizatoin
    print('current_loc:', current_loc, [B_all.max(), B_all.min()],
          [bat1x, bat1y, bat1z])

    np.save(path + "/trueXYZ_" + '{:09d}'.format(current_loc),
            np.array([bat1x, bat1y, bat1z]))
    np.save(path + "/specgram_" + '{:09d}'.format(current_loc), B_all)


results = joblib.Parallel(n_jobs=-1)([
    joblib.delayed(make_specgram)(current_loc)
    for current_loc in np.arange(1606200, 46546200, 1500)
])

Example #7

0

Show file

File: testing.py Project: wmymartin/orange3

    def fit(self, train_data, test_data=None):
        """Fits `self.learners` using folds sampled from the provided data.

        Args:
            train_data (Table): table to sample train folds
            test_data (Optional[Table]): tap to sample test folds
                of None then `train_data` will be used

        """
        test_data = test_data or train_data
        self.setup_indices(train_data, test_data)
        self.prepare_arrays(test_data)
        self._prepare_arrays(test_data)

        n_callbacks = len(self.learners) * len(self.indices)
        n_jobs = max(1, min(self.n_jobs, n_callbacks))

        def _is_picklable(obj):
            try:
                return bool(pickle.dumps(obj))
            except (AttributeError, TypeError, pickle.PicklingError):
                return False

        if n_jobs > 1 and not all(
                _is_picklable(learner) for learner in self.learners):
            n_jobs = 1
            warnings.warn(
                "Not all arguments (learners) are picklable. "
                "Setting n_jobs=1", OrangeWarning)

        if n_jobs > 1 and mp.current_process().daemon:
            n_jobs = 1
            warnings.warn(
                "Worker subprocesses cannot spawn new worker "
                "subprocesses (e.g. parameter tuning with internal "
                "cross-validation). Setting n_jobs=1", OrangeWarning)

        # Workaround for NumPy locking on Macintosh and Ubuntu 14.04 LTS
        # May be removed once offending libs and OSes are nowhere to be found.
        # https://pythonhosted.org/joblib/parallel.html#bad-interaction-of-multiprocessing-and-third-party-libraries
        mp_ctx = mp.get_context('forkserver' if sys.platform.startswith((
            'darwin', 'linux')) and n_jobs > 1 else None)

        if (n_jobs > 1 and mp_ctx.get_start_method() != 'fork'
                and train_data.X.size < self._MIN_NJOBS_X_SIZE):
            n_jobs = 1
            warnings.warn(
                "Working with small-enough data; single-threaded "
                "sequential excecution will (probably) be faster. "
                "Setting n_jobs=1", OrangeWarning)

        try:
            # Use context-adapted Queue or just the regular Queue if no
            # multiprocessing (otherwise it shits itself at least on Windos)
            mp_queue = mp_ctx.Manager().Queue() if n_jobs > 1 else mp.Queue()
        except (EOFError, RuntimeError):
            mp_queue = mp.Queue()
            n_jobs = 1
            warnings.warn(
                '''

        Can't run multiprocessing code without a __main__ guard.

        Multiprocessing strategies 'forkserver' (used by Orange's evaluation
        methods by default on Mac OS X) and 'spawn' (default on Windos)
        require the main code entry point be guarded with:

            if __name__ == '__main__':
                import multiprocessing as mp
                mp.freeze_support()  # Needed only on Windos
                ...  # Rest of your code
                ...  # See: https://docs.python.org/3/library/__main__.html

        Otherwise, as the module is re-imported in another process, infinite
        recursion ensues.

        Guard your executed code with above Python idiom, or pass n_jobs=1
        to evaluation methods, i.e. {}(..., n_jobs=1). Setting n_jobs to 1.
            '''.format(self.__class__.__name__), OrangeWarning)

        data_splits = ((fold_i, self.preprocessor(train_data[train_i]),
                        test_data[test_i])
                       for fold_i, (train_i,
                                    test_i) in enumerate(self.indices))

        args_iter = (
            (fold_i, train_data, test_data, learner_i, learner,
             self.store_models, mp_queue)
            # NOTE: If this nested for loop doesn't work, try
            # itertools.product
            for (fold_i, train_data, test_data) in data_splits
            for (learner_i, learner) in enumerate(self.learners))

        def _callback_percent(n_steps, queue):
            """Block until one of the subprocesses completes, before
            signalling callback with percent"""
            for percent in np.linspace(.0, .99, n_steps + 1)[1:]:
                queue.get()
                try:
                    self._callback(percent)
                except Exception:
                    # Callback may error for whatever reason (e.g. PEBKAC)
                    # In that case, rather gracefully continue computation
                    # instead of failing
                    pass

        results = []
        with joblib.Parallel(n_jobs=n_jobs, backend=mp_ctx) as parallel:
            tasks = (joblib.delayed(_mp_worker)(*args) for args in args_iter)
            # Start the tasks from another thread ...
            thread = Thread(target=lambda: results.append(parallel(tasks)))
            thread.start()
            # ... so that we can update the GUI (callback) from the main thread
            _callback_percent(n_callbacks, mp_queue)
            thread.join()

        results = sorted(results[0])

        ptr, prev_fold_i, prev_n_values = 0, 0, 0
        for res in results:
            if res.fold_i != prev_fold_i:
                ptr += prev_n_values
                prev_fold_i = res.fold_i
            result_slice = slice(ptr, ptr + res.n_values)
            prev_n_values = res.n_values

            if res.failed:
                self.failed[res.learner_i] = res.failed
                continue

            if self.store_models:
                self.models[res.fold_i][res.learner_i] = res.model

            self.predicted[res.learner_i][result_slice] = res.values
            if train_data.domain.has_discrete_class:
                self.probabilities[res.learner_i][result_slice, :] = res.probs

        self._callback(1)
        return self

Example #8

0

Show file

    def tune(self, ncores=1, csvname=None, verbose=True):
        """
        This function starts the tuning process with specified number of processors
    
        :param nthreads: (int) number of parallel threads (see the **Notes** section below for an important note about parallel execution)
        :param csvname: (str) the name of the csv file name to save the tuning results (useful for expensive cases as the csv file is updated directly after the case is done)
        :param verbose: (bool) whether to print updates to the screen or not
        """
        self.ncores = ncores
        self.csvlogger = csvname
        self.verbose = verbose

        if self.verbose:
            print(
                '***************************************************************'
            )
            print(
                '****************Bayesian Search is Running*********************'
            )
            print(
                '***************************************************************'
            )

            if self.ncores > 1:
                print(
                    '--- Running in parallel with {} threads and {} cases per threads'
                    .format(self.ncores, self.ncases))
                print('--- Total number of executed cases is {}*{}={} cases'.
                      format(self.ncores, self.ncases,
                             self.ncores * self.ncases))

        if self.ncores > 1:

            with joblib.Parallel(n_jobs=self.ncores) as parallel:
                x_vals, func_vals = zip(*parallel(
                    joblib.delayed(self.worker)(core + 1)
                    for core in range(self.ncores)))

            #flatten the x-lists for all cores
            x_vals_flatten = []
            for lists in x_vals:
                for item in lists:
                    x_vals_flatten.append(item)

            #flatten the y results from all cores
            func_vals_flatten = [
                item for sublist in func_vals for item in sublist
            ]

            assert len(func_vals_flatten) == len(
                x_vals_flatten
            ), '--error: the length of func_vals_flatten and x_vals_flatten in parallel Bayesian search must be equal'
            self.bayesres = pd.DataFrame(x_vals_flatten,
                                         columns=self.func_args)

            self.bayesres['score'] = np.array(
                func_vals_flatten
            ) if self.mode == 'min' else -np.array(func_vals_flatten)

        else:

            if self.mode == 'min':

                @use_named_args(dimensions=self.dimensions)
                def fitness_wrapper(*args, **kwargs):
                    return self.fit(*args, **kwargs)
            else:

                @use_named_args(dimensions=self.dimensions)
                def fitness_wrapper(*args, **kwargs):
                    return -self.fit(*args, **kwargs)

            #Single core search
            self.search_result = gp_minimize(
                func=fitness_wrapper,
                dimensions=self.dimensions,
                acq_func='EI',  # Expected Improvement.
                n_calls=self.ncases,
                random_state=self.seed)

            self.bayesres = pd.DataFrame(self.search_result.x_iters,
                                         columns=self.func_args)
            self.bayesres[
                'score'] = self.search_result.func_vals if self.mode == 'min' else -self.search_result.func_vals

        self.bayesres.index += 1

        if self.csvlogger:
            self.bayesres.index.name = 'id'
            self.bayesres.to_csv(self.csvlogger)

        return self.bayesres

Example #9

0

Show file

File: group_lasso_overlap_.py Project: vishalbelsare/regain

    def fit(self, X, y, check_input=True):
        """Fit model with coordinate descent.

        Parameters
        -----------
        X : ndarray or scipy.sparse matrix, (n_samples, n_features)
            Data

        y : ndarray, shape (n_samples,) or (n_samples, n_targets)
            Target

        check_input : boolean, (default=True)
            Allow to bypass several input checking.
            Don't use this parameter unless you know what you do.

        Notes
        -----

        Coordinate descent is an algorithm that considers each column of
        data at a time hence it will automatically convert the X input
        as a Fortran-contiguous numpy array if necessary.

        To avoid memory re-allocation it is advised to allocate the
        initial data in memory directly using that format.
        """

        if self.alpha == 0:
            warnings.warn(
                "With alpha=0, this algorithm does not converge "
                "well. You are advised to use the LinearRegression "
                "estimator",
                stacklevel=2,
            )

        if isinstance(self.precompute, six.string_types):
            raise ValueError("precompute should be one of True, False or"
                             " array-like. Got %r" % self.precompute)

        # We expect X and y to be float64 or float32 Fortran ordered arrays
        # when bypassing checks
        if check_input:
            X, y = check_X_y(
                X,
                y,
                accept_sparse="csc",
                order="F",
                dtype=[np.float64, np.float32],
                copy=self.copy_X and self.fit_intercept,
                multi_output=True,
                y_numeric=True,
            )
            y = check_array(y,
                            order="F",
                            copy=False,
                            dtype=X.dtype.type,
                            ensure_2d=False)

        X, y, X_offset, y_offset, X_scale, precompute, Xy = _pre_fit(
            X,
            y,
            None,
            self.precompute,
            self.normalize,
            self.fit_intercept,
            copy=False)

        if y.ndim == 1:
            y = y[:, None]
        if Xy is not None and Xy.ndim == 1:
            Xy = Xy[:, None]

        n_samples, n_features = X.shape
        n_targets = y.shape[1]

        if self.selection not in ["cyclic", "random"]:
            raise ValueError("selection should be either random or cyclic.")

        if not self.warm_start or self.coef_ is None:
            coef_ = np.zeros((n_targets, n_features), dtype=X.dtype, order="F")
        else:
            coef_ = self.coef_
            if coef_.ndim == 1:
                coef_ = coef_[None, :]

        dual_gaps_ = np.zeros(n_targets, dtype=X.dtype)
        if self.n_jobs == 1:
            self.n_iter_ = []
            history = []
            for k in range(n_targets):
                if self.mode == "admm":
                    this_coef, hist, this_iter = group_lasso_overlap(
                        X,
                        y[:, k],
                        lamda=self.alpha,
                        groups=self.groups,
                        rho=self.rho,
                        max_iter=self.max_iter,
                        tol=self.tol,
                        verbose=self.verbose,
                        rtol=self.rtol,
                    )
                elif self.mode == "paspal-matlab":
                    this_coef, hist, this_iter = group_lasso_overlap_paspal(
                        X,
                        y[:, k],
                        lamda=self.alpha,
                        groups=self.groups,
                        rho=self.rho,
                        max_iter=self.max_iter,
                        tol=self.tol,
                        verbose=self.verbose,
                        rtol=self.rtol,
                        matlab_engine=self.matlab_engine,
                    )
                elif self.mode == "paspal":  # paspal wrapper
                    this_coef, hist, this_iter = glopridu_algorithm(
                        X,
                        y[:, k],
                        tau=self.alpha,
                        blocks=self.groups,
                        max_iter_ext=self.max_iter,
                        tol_ext=self.tol,
                        verbose=self.verbose,
                        tol_int=self.rtol,
                    )
                else:
                    raise ValueError(self.mode)
                coef_[k] = this_coef.ravel()
                history.append(hist)
                self.n_iter_.append(this_iter)
        else:
            import joblib as jl

            if self.mode == "admm":
                coef_, history, self.n_iter_ = zip(*jl.Parallel(
                    n_jobs=self.n_jobs)(jl.delayed(group_lasso_overlap)(
                        X,
                        y[:, k],
                        lamda=self.alpha,
                        groups=self.groups,
                        rho=self.rho,
                        max_iter=self.max_iter,
                        tol=self.tol,
                        verbose=self.verbose,
                        rtol=self.rtol,
                    ) for k in range(n_targets)))
            elif self.mode == "paspal-matlab":  # paspal wrapper
                coef_, history, self.n_iter_ = zip(*jl.Parallel(
                    n_jobs=self.n_jobs)(jl.delayed(group_lasso_overlap_paspal)(
                        X,
                        y[:, k],
                        lamda=self.alpha,
                        groups=self.groups,
                        rho=self.rho,
                        max_iter=self.max_iter,
                        tol=self.tol,
                        verbose=self.verbose,
                        rtol=self.rtol,
                        matlab_engine=self.matlab_engine,
                    ) for k in range(n_targets)))
            elif self.mode == "paspal":  # paspal wrapper
                coef_, history, self.n_iter_ = zip(*jl.Parallel(
                    n_jobs=self.n_jobs)(jl.delayed(glopridu_algorithm)(
                        X,
                        y[:, k],
                        tau=self.alpha,
                        blocks=self.groups,
                        max_iter_ext=self.max_iter,
                        tol_ext=self.tol,
                        verbose=self.verbose,
                        tol_int=self.rtol,
                    ) for k in range(n_targets)))
            else:
                raise ValueError(self.mode)

        if n_targets == 1:
            self.n_iter_ = self.n_iter_[0]

        self.coef_, self.dual_gap_ = map(np.squeeze, [coef_, dual_gaps_])
        self._set_intercept(X_offset, y_offset, X_scale)

        # workaround since _set_intercept will cast self.coef_ into float64
        self.coef_ = np.asarray(self.coef_, dtype=X.dtype)

        self.history_ = history

        # return self for chaining fit and predict calls
        return self

Example #10

0

Show file

    def get_normalized_lupi_intervals(self, lupi_features, presetModel=None):

        # We define a list of all the features we want to compute relevance bounds for
        X, _ = self.data  # TODO: handle other data formats
        all_d = X.shape[1]
        normal_d = all_d - lupi_features

        # Compute relevance bounds and probes for normal features and LUPI
        with joblib.Parallel(n_jobs=self.n_jobs,
                             verbose=self.verbose) as parallel:
            d_n = _get_necessary_dimensions(normal_d, presetModel)
            rb = self.compute_relevance_bounds(d_n, parallel=parallel)
            probe_upper = self.compute_probe_values(d_n,
                                                    True,
                                                    parallel=parallel)
            probe_lower = self.compute_probe_values(d_n,
                                                    False,
                                                    parallel=parallel)

            d_l = _get_necessary_dimensions(all_d, presetModel, start=normal_d)
            rb_l = self.compute_relevance_bounds(d_l, parallel=parallel)
            probe_priv_upper = self.compute_probe_values(d_l,
                                                         True,
                                                         parallel=parallel)
            probe_priv_lower = self.compute_probe_values(d_l,
                                                         False,
                                                         parallel=parallel)

        #
        # Postprocess
        #

        # Get Scaling Parameters
        l1 = self.init_constraints["w_l1"]
        l1_priv = self.init_constraints["w_priv_l1"]
        l1 = l1 + l1_priv

        # Normalize Normal and Lupi features
        rb_norm = self._postprocessing(l1, rb)
        rb_l_norm = self._postprocessing(l1, rb_l)
        interval_ = np.concatenate([rb_norm, rb_l_norm])

        # Normalize Probes
        probe_lower = self._postprocessing(l1, probe_lower)
        probe_upper = self._postprocessing(l1, probe_upper)
        probe_priv_lower = self._postprocessing(l1, probe_priv_lower)
        probe_priv_upper = self._postprocessing(l1, probe_priv_upper)

        #
        #
        # Classify features
        self.f_classifier = FeatureClassifier(probe_lower,
                                              probe_upper,
                                              verbose=self.verbose)
        feature_classes = self.f_classifier.classify(rb_norm)

        self.f_classifier_lupi = FeatureClassifier(probe_priv_lower,
                                                   probe_priv_upper,
                                                   verbose=self.verbose)
        feature_classes_lupi = self.f_classifier_lupi.classify(rb_l_norm)

        fc_both = np.concatenate([feature_classes, feature_classes_lupi])

        return interval_, fc_both

Example #11

0

Show file

File: parallel.py Project: atong01/harmonic-alignment

 def __init__(self, n_jobs=1):
     self.n_jobs = n_jobs
     self.parallel = joblib.Parallel(n_jobs=self.n_jobs)
     self.reset()

Example #12

0

Show file

def parallel(n):
    xy_combos = ((x, y) for x in range(1, n) for y in range(1, n))
    digit_sum = joblib.delayed(get_digit_sum)
    results = joblib.Parallel(joblib.cpu_count() * 2)(digit_sum(x, y)
                                                      for x, y in xy_combos)
    print(max(results))

Example #13

0

Show file

File: generate_documents.py Project: MatinTavakoli/Internship

        ('BZar.ttf', 'BZarBold.ttf', 'BZarItalic.ttf'),
        ('CourierNew.ttf', 'CourierNewBold.ttf', 'CourierNewItalic.ttf'),
        ('HelveticaNormal.ttf', 'HelveticaBold.ttf'),
        ('IRANSans.ttf', 'IRANSansBold.ttf', 'IRANSansItalic.ttf'),
        ('NotoNaskhArabic.ttf', 'NotoNaskhArabicBold.ttf',
         'NotoNaskhArabicItalic.ttf'),
        ('Tahoma.ttf', 'TahomaBold.ttf'),
        ('TimesNewRoman.ttf', 'TimesNewRomanBold.ttf',
         'TimesNewRomanItalic.ttf'),
    ]
    fonts = [['fonts/' + name for name in item] for item in fonts]
    random.shuffle(images)
    texts, images, fonts = itertools.cycle(texts), itertools.cycle(
        images), itertools.cycle(fonts)

    # create htmls
    page_htmls = [
        create_page_html(texts, images, fonts,
                         random.choice(['tabale', 'multi-col']))
        for i in range(5000)
    ]

    joblib.Parallel(n_jobs=4, backend='multiprocessing')([
        joblib.delayed(render)(html,
                               address('resources/generated/{}/{}.png'.format(
                                   hashed(html)[:2], hashed(html))))
        for html in page_htmls
    ])
    # print json names
    # print([os.path.abspath(filename) for filename in glob('resources/generated/*/*.json')])

Example #14

0

Show file

File: 04_integrate_manual_chapfixes.py Project: UNC-Libraries-data/OnTheBooks

def main():

    # Set directory locations for old and new raw/aggregate files
    raw_path = r"C:\Users\npbyers\Desktop\OTB\ChapNumFixes\chap_adjusted_raw_round2"
    manual_path = "./fix_mats/"

    rawfolder = "./chap_adjusted_raw_round2/"
    aggfolder = "./chap_adjusted_agg_round2/"

    rawfolder_new = "./chap_cleaned/raw/"
    aggfolder_new = "./chap_cleaned/agg/"

    # Initialize lists for each file type
    flag_rows_filelist = []
    vol_list = []
    raw_filelist = []

    # Create a list of flag_rows files and a list of volumes for
    # which flag_rows files exist
    for root, dirs, files in os.walk(manual_path):
        for file in files:
            if "flag_rows" in file:
                folder = file.replace("_flag_rows.csv", "")
                flag_rows_filelist.append(manual_path + "/" + folder + "/" +
                                          file)
                vol_list.append(folder)

    # Create a list of all old raw files
    all_raw = [f for f in os.listdir(raw_path) if f.endswith(".csv")]

    # Create a list of raw files for those volumes with corresponding flag_rows
    # files. These are the raw files that will be sent to the 'fix_integration'
    # function.
    # If a volume did not undergo any manual fixes (and for which there is thus
    # no corresponding flag_rows file), the existing raw/agg files for that
    # volume are simply copied and pasted to the new "chap_cleaned" destination
    # directory.
    for i in all_raw:
        base = i.replace("_output_chapadjusted_rd2.csv", "")
        raw_outname_new = i.replace("_output_chapadjusted_rd2.csv",
                                    "_cleaned.csv")
        agg_outname_new = i.replace("_output_chapadjusted_rd2.csv",
                                    "_aggregated_cleaned.csv")
        agg_inname_old = i.replace("_output_chapadjusted_rd2.csv",
                                   "_aggregated_chapadjusted_rd2.csv")
        if base in vol_list:
            raw_filelist.append(rawfolder + i)
        else:
            copyfile(rawfolder + i, rawfolder_new + raw_outname_new)
            copyfile(aggfolder + agg_inname_old,
                     aggfolder_new + agg_outname_new)

    # Create a dataframe from the file in which all manual fixes and
    # transcriptions from the previous step (manual review) have been recorded
    fixfile = r"C:\Users\npbyers\Desktop\OTB\ChapNumFixes\fix_mats\Chap_Error_Fixes_for_script.csv"
    fix_df = pd.read_csv(fixfile, encoding='utf-8', low_memory=False)

    # Extract the rows in the fix file that pertain to the volume in question
    # and add them as a new dataframe to a dictionary which also contains
    # the filepath strings for the raw and flag_rows files for that volume
    raw_flag_fix_dicts = []
    for i in range(0, len(raw_filelist)):
        vol_fix_df = fix_df[fix_df['Volume'] ==
                            vol_list[i]].copy().reset_index()
        raw_flag_fix_dicts.append({
            'raw': raw_filelist[i],
            'flag_rows': flag_rows_filelist[i],
            'fixes': vol_fix_df
        })

    # Call the 'fix_integration' function using the dictionaries created above,
    # one for each volume with manual fixes to be integrated. This operation
    # is run in parallel to reduce compute time.
    with joblib.parallel_backend(n_jobs=7, backend='loky'):
        joblib.Parallel(verbose=5)(joblib.delayed(fix_integration)(fix_dict)
                                   for fix_dict in raw_flag_fix_dicts)

Example #15

0

Show file

    # Train Linear SVM classifier
    print('Training the SVM classifier...')
    lin_svm, std_scaler, pca = classification.train_linear_svm(
        vis_words, labels, C=1, dim_reduction=None)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Read the test set
    test_images_filenames, test_labels = io.load_test_set()
    print('Loaded {} test images.'.format(len(test_images_filenames)))

    # Feature extraction with sift, prediction with SVM and aggregation to obtain final class
    print('Predicting test data...')
    test_results = joblib.Parallel(n_jobs=N_JOBS, backend='threading')(
        joblib.delayed(parallel_testing)(test_image, test_label, codebook,
                                         lin_svm, std_scaler, pca)
        for test_image, test_label in zip(test_images_filenames, test_labels))

    pred_results = [x[0] for x in test_results]
    pred_class = [x[1] for x in test_results]
    pred_prob = [x[2] for x in test_results]

    num_correct = np.count_nonzero(pred_results)
    print('Elapsed time: {:.2f} s'.format(time.time() - temp))
    temp = time.time()

    # Compute accuracy
    accuracy = num_correct * 100.0 / len(test_images_filenames)

    # Show results and timing
    print('\nACCURACY: {:.2f}'.format(accuracy))

Example #16

0

Show file

    def _compute_efficient(self, bw):
        """
        Computes the bandwidth by estimating the scaling factor (c)
        in n_res resamples of size ``n_sub`` (in `randomize` case), or by
        dividing ``nobs`` into as many ``n_sub`` blocks as needed (if
        `randomize` is False).

        References
        ----------
        See p.9 in socserv.mcmaster.ca/racine/np_faq.pdf
        """

        if bw is None:
            self._bw_method = 'normal_reference'
        if isinstance(bw, string_types):
            self._bw_method = bw
        else:
            self._bw_method = "user-specified"
            return bw

        nobs = self.nobs
        n_sub = self.n_sub
        data = copy.deepcopy(self.data)
        n_cvars = self.data_type.count('c')
        co = 4  # 2*order of continuous kernel
        do = 4  # 2*order of discrete kernel
        _, ix_ord, ix_unord = _get_type_pos(self.data_type)

        # Define bounds for slicing the data
        if self.randomize:
            # randomize chooses blocks of size n_sub, independent of nobs
            bounds = [None] * self.n_res
        else:
            bounds = [(i * n_sub, (i + 1) * n_sub)
                      for i in range(nobs // n_sub)]
            if nobs % n_sub > 0:
                bounds.append((nobs - nobs % n_sub, nobs))

        n_blocks = self.n_res if self.randomize else len(bounds)
        sample_scale = np.empty((n_blocks, self.k_vars))
        only_bw = np.empty((n_blocks, self.k_vars))

        class_type, class_vars = self._get_class_vars_type()
        if has_joblib:
            # `res` is a list of tuples (sample_scale_sub, bw_sub)
            res = joblib.Parallel(n_jobs=self.n_jobs) \
                (joblib.delayed(_compute_subset) \
                (class_type, data, bw, co, do, n_cvars, ix_ord, ix_unord, \
                n_sub, class_vars, self.randomize, bounds[i]) \
                for i in range(n_blocks))
        else:
            res = []
            for i in range(n_blocks):
                res.append(
                    _compute_subset(class_type, data, bw, co, do, n_cvars,
                                    ix_ord, ix_unord, n_sub, class_vars,
                                    self.randomize, bounds[i]))

        for i in range(n_blocks):
            sample_scale[i, :] = res[i][0]
            only_bw[i, :] = res[i][1]

        s = self._compute_dispersion(data)
        order_func = np.median if self.return_median else np.mean
        m_scale = order_func(sample_scale, axis=0)
        # TODO: Check if 1/5 is correct in line below!
        bw = m_scale * s * nobs**(-1. / (n_cvars + co))
        bw[ix_ord] = m_scale[ix_ord] * nobs**(-2. / (n_cvars + do))
        bw[ix_unord] = m_scale[ix_unord] * nobs**(-2. / (n_cvars + do))

        if self.return_only_bw:
            bw = np.median(only_bw, axis=0)

        return bw

Example #17

0

Show file

def parallel_map(f, items, cpus=PARALLEL_MAP_CPUS):
    return joblib.Parallel(n_jobs=cpus)(joblib.delayed(f)(item)
                                        for item in items)

Example #18

0

Show file

File: np4.py Project: pleiszenburg/gravitation

    def start_kernel(self):

        self.DTYPE = self._dtype

        # Get const values
        self.MASS_LEN = len(self)
        self.SIM_DIM = len(self._mass_list[0]._r)

        # Init multiprocessing pool
        self.CPU_LEN = self._threads
        self.cpu_pool = joblib.Parallel(
            n_jobs=self.CPU_LEN,
            prefer='processes'  # alternative: 'threads'
        )

        self.data_pool = []

        for _ in range(self.CPU_LEN):
            self.data_pool.append({
                # Allocate memory: Object parameters
                'mass_r_array':
                np.zeros((self.MASS_LEN, self.SIM_DIM), dtype=self.DTYPE),
                'mass_a_array':
                np.zeros((self.MASS_LEN, self.SIM_DIM), dtype=self.DTYPE),
                'mass_m_array':
                np.zeros((self.MASS_LEN, ), dtype=self.DTYPE),
                # Allocate memory: Temporary variables
                'relative_r':
                np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE),
                'distance_sq':
                np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE),
                'distance_sqv':
                np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE),
                'distance_inv':
                np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE),
                'a_factor':
                np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE),
                'a1':
                np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE),
                'a1r':
                np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE),
                'a1v':
                np.zeros((self.SIM_DIM, ), dtype=self.DTYPE),
                'a2':
                np.zeros((self.MASS_LEN - 1, ), dtype=self.DTYPE),
                'a2r':
                np.zeros((self.MASS_LEN - 1, self.SIM_DIM), dtype=self.DTYPE),
                'G':
                self._G,
            })

        # Copy const data into Numpy infrastructure
        for pm_index, pm in enumerate(self._mass_list):
            self.data_pool[0]['mass_m_array'][pm_index] = pm._m
        for data_set in self.data_pool[1:]:
            data_set['mass_m_array'][:] = self.data_pool[0]['mass_m_array'][:]

        # Compute line index tuples for evenly sized batches
        total_pairs = (self.MASS_LEN * (self.MASS_LEN - 1)) // 2
        batch_length = total_pairs // self.CPU_LEN
        self.index_pool = []
        pair_count = 0
        start_line = 0
        for line in range(1, self.MASS_LEN - 1):
            pair_count += (self.MASS_LEN - 1 - line)
            if pair_count < batch_length:
                continue
            pair_count = 0
            self.index_pool.append((start_line, line))
            start_line = line
        assert len(self.index_pool) in [(self.CPU_LEN - 1), self.CPU_LEN]
        if len(self.index_pool) == self.CPU_LEN - 1:
            self.index_pool.append((start_line, self.MASS_LEN - 1))
        assert self.index_pool[-1][1] == self.MASS_LEN - 1

Example #19

0

Show file

    vpi_out_arr = np.array(vpi_out_lst)
    vpi_out_arr = 100 * vpi_out_arr
    vci_out_arr = np.array(vci_out_lst)

    # write VCI array to disc
    out_pth_vci = r'Y:\germany-drought\VCI_VPI\\' + tile + r'\\' + out_descr + '_VCI.tif'
    writeRasterInt(vci_out_arr, out_pth_vci, gt, pr, -32767)

    ## Write VPI array to disc
    out_pth_vpi = r'Y:\germany-drought\VCI_VPI\\' + tile + r'\\' + out_descr + '_VPI.tif'
    writeRasterInt(vpi_out_arr, out_pth_vpi, gt, pr, -32767)

    ## Optional: Writing statistic arrays to disc
    # out_pth_min = r'Y:\germany-drought\VCI_VPI\\' + tile + r'\\' + bl + '_NDVI_MIN.tif'
    # writeRasterInt(min_arr, out_pth_min, gt, pr, -32767)
    # out_pth_max = r'Y:\germany-drought\VCI_VPI\\' + tile + r'\\' + bl + '_NDVI_MAX.tif'
    # writeRasterInt(max_arr, out_pth_max, gt, pr, -32767)
    # out_pth_std = r'Y:\germany-drought\VCI_VPI\\' + tile + r'\\' + bl + '_NDVI_STD.tif'
    # writeRasterFloat(out_pth_std, out_pth, gt, pr, -32767)
    # out_pth_avg = r'Y:\germany-drought\VCI_VPI\\' + tile + r'\\' + bl + '_NDVI_AVG.tif'
    # writeRasterFloat(out_pth_avg, out_pth, gt, pr, -32767)

    print('Done: ' + tile)

if __name__ == '__main__':
    joblib.Parallel(n_jobs=40)(joblib.delayed(workFunc)(i) for i in job_lst)

endtime = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())

print("Start time " + str(starttime))
print("End time " + str(endtime))

Example #20

0

Show file

import joblib

dst_dir = 'data/temp/joblib/dst_img'
os.makedirs(dst_dir, exist_ok=True)

files = glob.glob('data/temp/joblib/src_img/*')

for f in files:
    try:
        img = Image.open(f)
        img_resize = img.resize((img.width // 2, img.height // 2))
        root, ext = os.path.splitext(f)
        basename = os.path.basename(root)
        img_resize.save(os.path.join(dst_dir, basename + '_half' + ext))
    except OSError as e:
        pass


def func(f):
    try:
        img = Image.open(f)
        img_resize = img.resize((img.width // 2, img.height // 2))
        root, ext = os.path.splitext(f)
        basename = os.path.basename(root)
        img_resize.save(os.path.join(dst_dir, basename + '_half' + ext))
    except OSError as e:
        pass


_ = joblib.Parallel(n_jobs=-1)(joblib.delayed(func)(f) for f in files)

Example #21

0

Show file

File: cvglmnet.py Project: petomajci/glmnet_python

def cvglmnet(*,
             x,
             y,
             family='gaussian',
             ptype='default',
             nfolds=10,
             foldid=scipy.empty([0]),
             parallel=1,
             keep=False,
             grouped=True,
             **options):

    options = glmnetSet(options)

    if 0 < len(options['lambdau']) < 2:
        raise ValueError('Need more than one value of lambda for cv.glmnet')

    nobs = x.shape[0]

    # we should not really need this. user must supply the right shape
    # if y.shape[0] != nobs:
    #    y = scipy.transpose(y)

    # convert 1d python array of size nobs to 2d python array of size nobs x 1
    if len(y.shape) == 1:
        y = scipy.reshape(y, [y.size, 1])

    # we should not really need this. user must supply the right shape
    # if (len(options['offset']) > 0) and (options['offset'].shape[0] != nobs):
    #    options['offset'] = scipy.transpose(options['offset'])

    if len(options['weights']) == 0:
        options['weights'] = scipy.ones([nobs, 1], dtype=scipy.float64)

    # main call to glmnet
    glmfit = glmnet(x=x, y=y, family=family, **options)

    is_offset = glmfit['offset']
    options['lambdau'] = glmfit['lambdau']

    nz = glmnetPredict(glmfit, scipy.empty([0]), scipy.empty([0]), 'nonzero')
    if glmfit['class'] == 'multnet':
        nnz = scipy.zeros([len(options['lambdau']), len(nz)])
        for i in range(len(nz)):
            nnz[:, i] = scipy.transpose(scipy.sum(nz[i], axis=0))
        nz = scipy.ceil(scipy.median(nnz, axis=1))
    elif glmfit['class'] == 'mrelnet':
        nz = scipy.transpose(scipy.sum(nz[0], axis=0))
    else:
        nz = scipy.transpose(scipy.sum(nz, axis=0))

    if len(foldid) == 0:
        ma = scipy.tile(scipy.arange(nfolds),
                        [1, int(scipy.floor(nobs / nfolds))])
        mb = scipy.arange(scipy.mod(nobs, nfolds))
        mb = scipy.reshape(mb, [1, mb.size])
        population = scipy.append(ma, mb, axis=1)
        mc = scipy.random.permutation(len(population))
        mc = mc[0:nobs]
        foldid = population[mc]
        foldid = scipy.reshape(foldid, [
            foldid.size,
        ])
    else:
        nfolds = scipy.amax(foldid) + 1

    if nfolds < 3:
        raise ValueError(
            'nfolds must be bigger than 3; nfolds = 10 recommended')

    cpredmat = list()
    foldid = scipy.reshape(foldid, [
        foldid.size,
    ])
    if parallel != 1:
        if parallel == -1:
            num_cores = multiprocessing.cpu_count()
        else:
            num_cores = parallel
        sys.stderr.write("[status]\tParallel glmnet cv with " +
                         str(num_cores) + " cores\n")
        cpredmat = joblib.Parallel(n_jobs=num_cores)(joblib.delayed(doCV)(
            i, x, y, family, foldid, nfolds, is_offset, **options)
                                                     for i in range(nfolds))
    else:
        for i in range(nfolds):
            newFit = doCV(i, x, y, family, foldid, nfolds, is_offset,
                          **options)
            cpredmat.append(newFit)

    if cpredmat[0]['class'] == 'elnet':
        cvstuff = cvelnet(cpredmat, options['lambdau'], x, y,
                          options['weights'], options['offset'], foldid, ptype,
                          grouped, keep)
    elif cpredmat[0]['class'] == 'lognet':
        cvstuff = cvlognet(cpredmat, options['lambdau'], x, y,
                           options['weights'], options['offset'], foldid,
                           nfolds, ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'multnet':
        cvstuff = cvmultnet(cpredmat, options['lambdau'], x, y,
                            options['weights'], options['offset'], foldid,
                            ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'mrelnet':
        cvstuff = cvmrelnet(cpredmat, options['lambdau'], x, y,
                            options['weights'], options['offset'], foldid,
                            ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'fishnet':
        cvstuff = cvfishnet(cpredmat, options['lambdau'], x, y,
                            options['weights'], options['offset'], foldid,
                            ptype, grouped, keep)
    elif cpredmat[0]['class'] == 'coxnet':
        raise NotImplementedError(
            'Cross-validation for coxnet not implemented yet.')
        #cvstuff = cvcoxnet(cpredmat, options['lambdau'], x, y \
        #                  , options['weights'], options['offset'] \
        #                  , foldid, ptype, grouped, keep)

    cvm = cvstuff['cvm']
    cvsd = cvstuff['cvsd']
    cvname = cvstuff['name']

    CVerr = dict()
    CVerr['lambdau'] = options['lambdau']
    CVerr['cvm'] = scipy.transpose(cvm)
    CVerr['cvsd'] = scipy.transpose(cvsd)
    CVerr['cvup'] = scipy.transpose(cvm + cvsd)
    CVerr['cvlo'] = scipy.transpose(cvm - cvsd)
    CVerr['nzero'] = nz
    CVerr['name'] = cvname
    CVerr['glmnet_fit'] = glmfit
    if keep:
        CVerr['fit_preval'] = cvstuff['fit_preval']
        CVerr['foldid'] = foldid
    if ptype == 'auc':
        cvm = -cvm
    aa = options['lambdau'][cvm <= scipy.amin(cvm)]
    if len(aa) > 0:
        CVerr['lambda_min'] = scipy.amax(aa).reshape([1])
        idmin = options['lambdau'] == CVerr['lambda_min']
        semin = cvm[idmin] + cvsd[idmin]
        CVerr['lambda_1se'] = scipy.amax(
            options['lambdau'][cvm <= semin]).reshape([1])
    else:
        CVerr['lambda_min'] = [1000]
        CVerr['lambda_1se'] = [1000]
    CVerr['class'] = 'cvglmnet'

    return CVerr

Example #22

0

Show file

File: compute.py Project: yodeng/outrigger

def _maybe_parallelize_psi(
        event_annotation,
        reads2d,
        isoform1_junctions,
        isoform2_junctions,
        min_reads=MIN_READS,
        method='mean',
        uneven_coverage_multiplier=UNEVEN_COVERAGE_MULTIPLIER,
        n_jobs=-1):
    """If n_jobs!=1, run the parallelized version of psi

    Parameters
    ----------
    event_annotation : pandas.DataFrame
        A table of all possible events, with event ids as the index (row names)
        and all junctions described, and contains the columns described by
        ``isoform1_junctions`` and ``isoform_junctions``
    reads2d : pandas.DataFrame
        A (n_samples, n_total_junctions) table of the number of reads found in
        all samples' exon-exon, all junctions. Very very large, e.g.
        1000 samples x 50,000 junctions = 50 million elements
    isoform1_junctions : list of str
        Junction numbers corresponding to isoform 1, e.g. ['junction13']
    isoform2_junctions : list of str
        Junction numbers corresponding to isoform 2, e.g. ['junction12',
        'junction23']
    min_reads : int, optional
        Minimum number of reads for a junction to be viable. The rules
        governing compatibility of events are complex, and it is recommended to
        read the documentation for ``outrigger psi`` (default=10)
    method : "mean" | "min", optional
        Denotes the method by which to aggregate junctions from the same
        isoform - either use the mean (default) or the minimum.
        (default="mean")
    uneven_coverage_multiplier : int, optional
        Scale factor for the maximum amount bigger one side of a junction can
        be before rejecting the event, e.g. for an SE event with two junctions,
        junction12 and junction23, junction12=40 but junction23=500, then this
        event would be rejected because 500 > 40*10 (default=10)
    n_jobs : int, optional
        Number of subprocesses to create. Default is -1, which is to use as
        many processes/cores as possible

    Returns
    -------
    summary : pandas.DataFrame
        A (n_samples * n_events, 7) shaped table with the sample id, junction
        reads, percent spliced-in (Psi), and notes on each event in each
        sample, that explains why or why not Psi was calculated
    """
    # There are multiple rows with the same event id because the junctions
    # are the same, but the flanking exons may be a little wider or shorter,
    # but ultimately the event Psi is calculated only on the junctions so the
    # flanking exons don't matter for this. But, all the exons are in
    # exon\d.bed in the index! And you, the lovely user, can decide what you
    # want to do with them!
    grouped = event_annotation.groupby(level=0, axis=0)

    n_events = len(grouped.size())

    if n_jobs == 1:
        # Do a separate branch because joblib doesn't do a good job of
        # managing the python debugger so use --n-jobs=1 (n_jobs=1) when
        # debugging
        progress('\tIterating over {} events ...\n'.format(n_events))
        summaries = []
        for event_id, event_df in grouped:
            summary = _single_event_psi(
                event_id,
                event_df,
                reads2d,
                isoform1_junctions,
                isoform2_junctions,
                min_reads=min_reads,
                uneven_coverage_multiplier=uneven_coverage_multiplier,
                method=method)
            summaries.append(summary)
    else:
        processors = n_jobs if n_jobs > 0 else joblib.cpu_count()
        progress("\tParallelizing {} events' Psi calculation across {} "
                 "CPUs ...\n".format(n_events, processors))
        summaries = joblib.Parallel(n_jobs=n_jobs)(
            joblib.delayed(_single_event_psi)(
                event_id,
                event_df,
                reads2d,
                isoform1_junctions,
                isoform2_junctions,
                min_reads=min_reads,
                uneven_coverage_multiplier=uneven_coverage_multiplier,
                method=method) for event_id, event_df in grouped)

    return summaries

Example #23

0

Show file

File: rcca.py Project: DipSutra/CCA

    def train(self, data, parallel=True):
        """
        Train CCA with cross-validation for a set of regularization
        coefficients and/or numbers of CCs
        Attributes:
            data (list): training data matrices
                         (number of samples X number of features).
                         Number of samples must match across datasets.
            parallel (bool): use joblib to train cross-validation folds
                             in parallel
        """
        nT = data[0].shape[0]
        chunklen = 10 if nT > 50 else 1
        nchunks = int(0.2 * nT / chunklen)
        indchunks = zip(*[iter(range(nT))] * chunklen)
        corr_mat = np.zeros((len(self.regs), len(self.numCCs)))
        selection = max(int(self.select * min([d.shape[1] for d in data])), 1)
        for ri, reg in enumerate(self.regs):
            for ci, numCC in enumerate(self.numCCs):
                running_corr_mean_sum = 0.
                if parallel:
                    fold_corr_means = joblib.Parallel(n_jobs=self.numCV)(
                        joblib.delayed(train_cvfold)(data=data,
                                                     reg=reg,
                                                     numCC=numCC,
                                                     kernelcca=self.kernelcca,
                                                     ktype=self.ktype,
                                                     gausigma=self.gausigma,
                                                     degree=self.degree,
                                                     cutoff=self.cutoff,
                                                     selection=selection)
                        for fold in range(self.numCV))
                    running_corr_mean_sum += sum(fold_corr_means)
                else:
                    for cvfold in range(self.numCV):
                        fold_corr_mean = train_cvfold(data=data,
                                                      reg=reg,
                                                      numCC=numCC,
                                                      kernelcca=self.kernelcca,
                                                      ktype=self.ktype,
                                                      gausigma=self.gausigma,
                                                      degree=self.degree,
                                                      cutoff=self.cutoff,
                                                      selection=selection)
                        running_corr_mean_sum += fold_corr_mean

                corr_mat[ri, ci] = running_corr_mean_sum / self.numCV
        best_ri, best_ci = np.where(corr_mat == corr_mat.max())
        self.best_reg = self.regs[best_ri[0]]
        self.best_numCC = self.numCCs[best_ci[0]]

        comps = kcca(data,
                     self.best_reg,
                     self.best_numCC,
                     kernelcca=self.kernelcca,
                     ktype=self.ktype,
                     gausigma=self.gausigma,
                     degree=self.degree)
        self.cancorrs, self.ws, self.comps = recon(data,
                                                   comps,
                                                   kernelcca=self.kernelcca)
        if len(data) == 2:
            self.cancorrs = self.cancorrs[np.nonzero(self.cancorrs)]
        return self

Example #24

0

Show file

fns.sort()
for fn in fns[1:]:
    ncname = os.path.split(fn)[1]
    print "loading...",fn
    ncf = NCFile(fn, cache_s21_raw=True)
    print "accessing sweep..."
    swa = ncf.SweepArray0


    print "extracting data..."
    data = []
    for k in range(32):
        swp = swa.sweep(k)
        data.append((swp.frequency, swp.s21_point, swp.s21_point_error))
    print "starting parallel jobs..."
    pp = joblib.Parallel(n_jobs=16,verbose=5)
    results = pp([joblib.delayed(process_sweep)(*args) for args in data])

    results = list(itertools.chain.from_iterable(results))
    print "saving results..."
    print joblib.dump(results,('/home/gjones/%s_resonators.pkl' % ncname),compress=True)

    print "plotting..."

    fig,ax = plt.subplots()
    for k in range(32):
        swp = swa.sweep(k)
        ax.plot(swp.frequency, 20 * np.log10(np.abs(swp.s21_point)))
        for params in results:
            f0 = params['f_0'].value
            if f0 > swp.frequency.min() and f0 < swp.frequency.max():

Example #25

0

Show file

def permuted_ols(tested_vars,
                 target_vars,
                 confounding_vars=None,
                 model_intercept=True,
                 n_perm=10000,
                 two_sided_test=True,
                 random_state=None,
                 n_jobs=1,
                 verbose=0):
    """Massively univariate group analysis with permuted OLS.

    Tested variates are independently fitted to target variates descriptors
    (e.g. brain imaging signal) according to a linear model solved with an
    Ordinary Least Squares criterion.
    Confounding variates may be included in the model.
    Permutation testing is used to assess the significance of the relationship
    between the tested variates and the target variates [1]_, [2]_.
    A max-type procedure is used to obtain family-wise corrected p-values.

    The specific permutation scheme implemented here is the one of
    [3]_. Its has been demonstrated in [1]_ that this
    scheme conveys more sensitivity than alternative schemes. This holds
    for neuroimaging applications, as discussed in details in [2]_.

    Permutations are performed on parallel computing units. Each of them
    performs a fraction of permutations on the whole dataset. Thus, the max
    t-score amongst data descriptors can be computed directly, which avoids
    storing all the computed t-scores.

    The variates should be given C-contiguous. target_vars are fortran-ordered
    automatically to speed-up computations.

    Parameters
    ----------
    tested_vars : array-like, shape=(n_samples, n_regressors)
      Explanatory variates, fitted and tested independently from each others.

    target_vars : array-like, shape=(n_samples, n_descriptors)
      fMRI data, trying to be explained by explanatory and confounding
      variates.

    confounding_vars : array-like, shape=(n_samples, n_covars), optional
      Confounding variates (covariates), fitted but not tested.
      If None, no confounding variate is added to the model
      (except maybe a constant column according to the value of
      `model_intercept`)

    model_intercept : bool, optional
      If True, a constant column is added to the confounding variates
      unless the tested variate is already the intercept.
      Default=True

    n_perm : int, optional
      Number of permutations to perform.
      Permutations are costly but the more are performed, the more precision
      one gets in the p-values estimation. Default=10000.

    two_sided_test : boolean, optional
      If True, performs an unsigned t-test. Both positive and negative
      effects are considered; the null hypothesis is that the effect is zero.
      If False, only positive effects are considered as relevant. The null
      hypothesis is that the effect is zero or negative. Default=True.

    random_state : int or None, optional
      Seed for random number generator, to have the same permutations
      in each computing units.

    n_jobs : int, optional
      Number of parallel workers.
      If 0 is provided, all CPUs are used.
      A negative number indicates that all the CPUs except (abs(n_jobs) - 1)
      ones will be used. Default=1.

    verbose : int, optional
        verbosity level (0 means no message). Default=0.

    Returns
    -------
    pvals : array-like, shape=(n_regressors, n_descriptors)
      Negative log10 p-values associated with the significance test of the
      n_regressors explanatory variates against the n_descriptors target
      variates. Family-wise corrected p-values.

    score_orig_data : numpy.ndarray, shape=(n_regressors, n_descriptors)
      t-statistic associated with the significance test of the n_regressors
      explanatory variates against the n_descriptors target variates.
      The ranks of the scores into the h0 distribution correspond to the
      p-values.

    h0_fmax : array-like, shape=(n_perm, )
      Distribution of the (max) t-statistic under the null hypothesis
      (obtained from the permutations). Array is sorted.

    References
    ----------
    .. [1] Anderson, M. J. & Robinson, J. (2001). Permutation tests for
       linear models. Australian & New Zealand Journal of Statistics, 43(1), 75-88.

    .. [2] Winkler, A. M. et al. (2014). Permutation inference for the general
       linear model. Neuroimage.

    .. [3] Freedman, D. & Lane, D. (1983). A nonstochastic interpretation of reported
       significance levels. J. Bus. Econ. Stats., 1(4), 292-298

    """
    # initialize the seed of the random generator
    rng = check_random_state(random_state)

    # check n_jobs (number of CPUs)
    if n_jobs == 0:  # invalid according to joblib's conventions
        raise ValueError("'n_jobs == 0' is not a valid choice. "
                         "Please provide a positive number of CPUs, or -1 "
                         "for all CPUs, or a negative number (-i) for "
                         "'all but (i-1)' CPUs (joblib conventions).")
    elif n_jobs < 0:
        n_jobs = max(1, joblib.cpu_count() - int(n_jobs) + 1)
    else:
        n_jobs = min(n_jobs, joblib.cpu_count())
    # make target_vars F-ordered to speed-up computation
    if target_vars.ndim != 2:
        raise ValueError(
            "'target_vars' should be a 2D array. "
            "An array with %d dimension%s was passed" %
            (target_vars.ndim, "s" if target_vars.ndim > 1 else ""))
    target_vars = np.asfortranarray(target_vars)  # efficient for chunking
    n_descriptors = target_vars.shape[1]
    if np.any(np.all(target_vars == 0, axis=0)):
        warnings.warn(
            "Some descriptors in 'target_vars' have zeros across all "
            "samples. These descriptors will be ignored during null "
            "distribution generation.")

    # check explanatory variates dimensions
    if tested_vars.ndim == 1:
        tested_vars = np.atleast_2d(tested_vars).T
    n_samples, n_regressors = tested_vars.shape

    # check if explanatory variates is intercept (constant) or not
    if (n_regressors == 1 and np.unique(tested_vars).size == 1):
        intercept_test = True
    else:
        intercept_test = False

    # optionally add intercept
    if model_intercept and not intercept_test:
        if confounding_vars is not None:
            confounding_vars = np.hstack(
                (confounding_vars, np.ones((n_samples, 1))))
        else:
            confounding_vars = np.ones((n_samples, 1))

    ### OLS regression on original data
    if confounding_vars is not None:
        # step 1: extract effect of covars from target vars
        covars_orthonormalized = orthonormalize_matrix(confounding_vars)
        if not covars_orthonormalized.flags['C_CONTIGUOUS']:
            # useful to developer
            warnings.warn('Confounding variates not C_CONTIGUOUS.')
            covars_orthonormalized = np.ascontiguousarray(
                covars_orthonormalized)
        targetvars_normalized = normalize_matrix_on_axis(
            target_vars).T  # faster with F-ordered target_vars_chunk
        if not targetvars_normalized.flags['C_CONTIGUOUS']:
            # useful to developer
            warnings.warn('Target variates not C_CONTIGUOUS.')
            targetvars_normalized = np.ascontiguousarray(targetvars_normalized)
        beta_targetvars_covars = np.dot(targetvars_normalized,
                                        covars_orthonormalized)
        targetvars_resid_covars = targetvars_normalized - np.dot(
            beta_targetvars_covars, covars_orthonormalized.T)
        targetvars_resid_covars = normalize_matrix_on_axis(
            targetvars_resid_covars, axis=1)
        # step 2: extract effect of covars from tested vars
        testedvars_normalized = normalize_matrix_on_axis(tested_vars.T, axis=1)
        beta_testedvars_covars = np.dot(testedvars_normalized,
                                        covars_orthonormalized)
        testedvars_resid_covars = testedvars_normalized - np.dot(
            beta_testedvars_covars, covars_orthonormalized.T)
        testedvars_resid_covars = normalize_matrix_on_axis(
            testedvars_resid_covars, axis=1).T.copy()
    else:
        targetvars_resid_covars = normalize_matrix_on_axis(target_vars).T
        testedvars_resid_covars = normalize_matrix_on_axis(tested_vars).copy()
        covars_orthonormalized = None
    # check arrays contiguousity (for the sake of code efficiency)
    if not targetvars_resid_covars.flags['C_CONTIGUOUS']:
        # useful to developer
        warnings.warn('Target variates not C_CONTIGUOUS.')
        targetvars_resid_covars = np.ascontiguousarray(targetvars_resid_covars)
    if not testedvars_resid_covars.flags['C_CONTIGUOUS']:
        # useful to developer
        warnings.warn('Tested variates not C_CONTIGUOUS.')
        testedvars_resid_covars = np.ascontiguousarray(testedvars_resid_covars)
    # step 3: original regression (= regression on residuals + adjust t-score)
    # compute t score for original data
    scores_original_data = _t_score_with_covars_and_normalized_design(
        testedvars_resid_covars, targetvars_resid_covars.T,
        covars_orthonormalized)
    if two_sided_test:
        sign_scores_original_data = np.sign(scores_original_data)
        scores_original_data = np.fabs(scores_original_data)

    ### Permutations
    # parallel computing units perform a reduced number of permutations each
    if n_perm > n_jobs:
        n_perm_chunks = np.asarray([n_perm / n_jobs] * n_jobs, dtype=int)
        n_perm_chunks[-1] += n_perm % n_jobs
    elif n_perm > 0:
        warnings.warn('The specified number of permutations is %d and '
                      'the number of jobs to be performed in parallel has '
                      'set to %s. This is incompatible so only %d jobs will '
                      'be running. You may want to perform more permutations '
                      'in order to take the most of the available computing '
                      'ressources.' % (n_perm, n_jobs, n_perm))
        n_perm_chunks = np.ones(n_perm, dtype=int)
    else:  # 0 or negative number of permutations => original data scores only
        if two_sided_test:
            scores_original_data = (scores_original_data *
                                    sign_scores_original_data)
        return np.asarray([]), scores_original_data.T, np.asarray([])

    # actual permutations, seeded from a random integer between 0 and maximum
    # value represented by np.int32 (to have a large entropy).
    ret = joblib.Parallel(n_jobs=n_jobs, verbose=verbose)(
        joblib.delayed(_permuted_ols_on_chunk)(scores_original_data,
                                               testedvars_resid_covars,
                                               targetvars_resid_covars.T,
                                               thread_id + 1,
                                               covars_orthonormalized,
                                               n_perm=n_perm,
                                               n_perm_chunk=n_perm_chunk,
                                               intercept_test=intercept_test,
                                               two_sided_test=two_sided_test,
                                               random_state=rng.randint(
                                                   1,
                                                   np.iinfo(np.int32).max - 1),
                                               verbose=verbose)
        for thread_id, n_perm_chunk in enumerate(n_perm_chunks))
    # reduce results
    scores_as_ranks_parts, h0_fmax_parts = zip(*ret)
    h0_fmax = np.hstack((h0_fmax_parts))
    scores_as_ranks = np.zeros((n_regressors, n_descriptors))
    for scores_as_ranks_part in scores_as_ranks_parts:
        scores_as_ranks += scores_as_ranks_part
    # convert ranks into p-values
    pvals = (n_perm + 1 - scores_as_ranks) / float(1 + n_perm)

    # put back sign on scores if it was removed in the case of a two-sided test
    # (useful to distinguish between positive and negative effects)
    if two_sided_test:
        scores_original_data = scores_original_data * sign_scores_original_data

    return -np.log10(pvals), scores_original_data.T, h0_fmax[0]

Example #26

0

Show file

File: run_algo.py Project: maudl3116/CondSigKME

def _run_algos():
    name_config = ""
    for config_name, config in configs_getter.get_configs():
        name_config = config_name

    fpath = os.path.join(os.path.dirname(__file__),
                         "../../output/metrics_draft", f'{name_config}.csv')
    tmp_dirpath = f'{fpath}.tmp_results'
    os.makedirs(tmp_dirpath, exist_ok=True)
    atexit.register(shutil.rmtree, tmp_dirpath)
    tmp_files_idx = 0

    delayed_jobs = []

    nb_stocks_flag = [int(nb) for nb in FLAGS.nb_stocks or []]
    for config_name, config in configs_getter.get_configs():
        print(f'Config {config_name}', config)
        config.algos = [
            a for a in config.algos if FLAGS.algos is None or a in FLAGS.algos
        ]
        if nb_stocks_flag:
            config.nb_stocks = [
                a for a in config.nb_stocks if a in nb_stocks_flag
            ]
        combinations = list(
            itertools.product(
                config.algos, config.dividends, config.maturities,
                config.nb_dates, config.nb_paths, config.nb_stocks,
                config.payoffs, config.drift, config.spots,
                config.stock_models, config.strikes, config.volatilities,
                config.mean, config.speed, config.correlation, config.hurst,
                config.nb_epochs, config.hidden_size, config.factors,
                config.ridge_coeff, config.train_ITM_only, config.use_path))
        # random.shuffle(combinations)
        for params in combinations:
            for i in range(config.nb_runs):
                tmp_file_path = os.path.join(tmp_dirpath, str(tmp_files_idx))
                tmp_files_idx += 1
                delayed_jobs.append(
                    joblib.delayed(_run_algo)(
                        tmp_file_path,
                        *params,
                        fail_on_error=FLAGS.print_errors))

    print(f"Running {len(delayed_jobs)} tasks using "
          f"{FLAGS.nb_jobs}/{NUM_PROCESSORS} CPUs...")
    joblib.Parallel(n_jobs=FLAGS.nb_jobs)(delayed_jobs)

    print(f'Writing results to {fpath}...')
    with open(fpath, "w") as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=_CSV_HEADERS)
        writer.writeheader()
        for idx in range(tmp_files_idx):
            tmp_file_path = os.path.join(tmp_dirpath, str(idx))
            try:
                with open(tmp_file_path, "r") as read_f:
                    csvfile.write(read_f.read())
            except FileNotFoundError:
                pass

    return fpath

Example #27

0

Show file

Species_To_Store = np.array([False, False, False, False, True, True])

BatchNum = 400  # Number of replicate datasets

import joblib as jb
from functools import partial
ntasks = 40
for i in range(len(File_list)):
    input_file = File_list[i]
    # compute moments for BatchNum replicate datasets
    #Moms_time_data = []
    #for n in range(BatchNum):
    #data = Load_moms_time(input_file, Moments, keep_species = Species_To_Store)
    #Moms_time_data.append(data)
    Load_moms_time_p = partial(Load_moms_time,
                               input_filename=input_file,
                               Moments=Moments,
                               keep_species=Species_To_Store)
    Moms_time_data = jb.Parallel(n_jobs=ntasks)(jb.delayed(Load_moms_time_p)()
                                                for n in range(BatchNum))

    # preform GRN inference for each dataset in the Batch
    Batch_Inference(
        Moms_time_data, [DLab_m[i] + "(#%d)" % n for n in range(BatchNum)],
        DLab_m[i],
        shift=30,
        sub_sample=15,
        PDF_Save_dir='/nfs/datanumerik/people/araharin/Data_032021/PDF',
        GRN_Save_dir='/nfs/datanumerik/people/araharin/Data_032021/GRNs',
        indexes=indexes)

Example #28

0

Show file

    def resize(self,
               input_path,
               size=(1024, 768),
               output_dirpath=None,
               output_prefix='resized_image',
               n_jobs=-1):
        def __resize(img_path, size, output_dirpath, output_prefix):
            print(img_path)
            # open the original image
            img_original = skimage.io.imread(img_path)[:, :, :3]
            h, w, c = img_original.shape

            resize_ratio = None
            if max(h, w) == h:
                resize_ratio = size[1] / h
            else:
                resize_ratio = size[0] / w

            # resize the original image to target size
            img = skimage.transform.rescale(img_original,
                                            resize_ratio,
                                            mode='reflect',
                                            anti_aliasing=True,
                                            multichannel=True)

            # make background
            img_original_2x = skimage.transform.rescale(img_original,
                                                        1.5,
                                                        mode='reflect',
                                                        anti_aliasing=True,
                                                        multichannel=True)
            bg_img = self.__augmentation_generate_background(img_original_2x)
            bh, bw, bc = bg_img.shape
            block_size = img.shape[
                0] if img.shape[0] > img.shape[1] else img.shape[1]
            x0 = int((bg_img.shape[0] - block_size) / 2)
            x1 = x0 + block_size
            y0 = int((bg_img.shape[1] - block_size) / 2)
            y1 = y0 + block_size
            bg_img = bg_img[x0:x1, y0:y1]

            # synthesis
            img = self.__get_padding(img)
            mask = skimage.color.rgb2gray(img)
            mask = np.pad(
                skimage.transform.resize(
                    mask, (mask.shape[0] - 2, mask.shape[1] - 2),
                    mode='constant'), 1, self.__zero_padding)
            img[mask < 0.001] = bg_img[mask < 0.001]

            # print out resized image
            if output_dirpath is None:
                output_dirpath = ''
            new_file_path = os.path.join(
                output_dirpath,
                output_prefix + '_' + os.path.basename(img_path))
            skimage.io.imsave(new_file_path, img)

            # print out xml, if given
            xml_path = os.path.splitext(img_path)[0] + '.xml'
            new_file_path = os.path.join(
                output_dirpath,
                output_prefix + '_' + os.path.basename(xml_path))
            print(new_file_path)
            if xml_path is not None:
                # shift
                if max(h, w) == h:
                    shift_h = 0
                    shift_w = (h - w) * resize_ratio / 2
                else:
                    shift_w = 0
                    shift_h = (w - h) * resize_ratio / 2

                re_width = re.compile(r'<width>([0-9]+)</width>')
                re_height = re.compile(r'<height>([0-9]+)</height>')
                re_xmin = re.compile(r'<xmin>([0-9]+)</xmin>')
                re_xmax = re.compile(r'<xmax>([0-9]+)</xmax>')
                re_ymin = re.compile(r'<ymin>([0-9]+)</ymin>')
                re_ymax = re.compile(r'<ymax>([0-9]+)</ymax>')
                with open(xml_path, 'r') as inxml, open(new_file_path,
                                                        'w') as outxml:
                    for buf in inxml:
                        v = None
                        if '<width>' in buf:
                            v = re_width.search(buf).group(1)
                            buf = buf.replace(v, str(img.shape[1]))
                        elif '<height>' in buf:
                            v = re_height.search(buf).group(1)
                            buf = buf.replace(v, str(img.shape[0]))
                        elif 'xmin' in buf:
                            v = re_xmin.search(buf).group(1)
                            buf = buf.replace(
                                v, str(int(int(v) * resize_ratio + shift_w)))
                        elif 'xmax' in buf:
                            v = re_xmax.search(buf).group(1)
                            buf = buf.replace(
                                v, str(int(int(v) * resize_ratio + shift_w)))
                        elif 'ymin' in buf:
                            v = re_ymin.search(buf).group(1)
                            buf = buf.replace(
                                v, str(int(int(v) * resize_ratio + shift_h)))
                        elif 'ymax' in buf:
                            v = re_ymax.search(buf).group(1)
                            buf = buf.replace(
                                v, str(int(int(v) * resize_ratio + shift_h)))

                        outxml.write(buf)

        image_files = []
        if os.path.isfile(input_path):
            image_files = [input_path]
        elif os.path.isdir(input_path):
            print(input_path)
            for f in os.listdir(input_path):
                if os.path.splitext(f)[1] in self.image_extension:
                    image_files.append(os.path.join(input_path, f))

        r = joblib.Parallel(n_jobs=n_jobs, verbose=0)([
            joblib.delayed(__resize)(image_file, size, output_dirpath,
                                     output_prefix)
            for image_file in image_files
        ])

Example #29

0

Show file

def Parallel(**kwargs: Any) -> Any:  # TODO: disable lint complaint
    """Adapter for joblib.Parallel so we could if desired, centralize control"""
    # ATM just a straight invocation
    import joblib

    return joblib.Parallel(**kwargs)

Example #30

0

Show file

File: gridtune.py Project: wangcj05/neorl

    def tune(self, ncores=1, csvname=None, verbose=True):
        """
        This function starts the tuning process with specified number of processors
    
        :param ncores: (int) number of parallel processors (see the **Notes** section below for an important note about parallel execution)
        :param csvname: (str) the name of the csv file name to save the tuning results (useful for expensive cases as the csv file is updated directly after the case is done)
        :param verbose: (bool) whether to print updates to the screen or not
        """
        self.ncores = ncores
        self.csvlogger = csvname
        self.verbose = verbose

        if self.verbose:
            print(
                '***************************************************************'
            )
            print(
                '****************Grid Search is Running*************************'
            )
            print(
                '***************************************************************'
            )

            if self.ncores > 1:
                print('--- Running in parallel with {} cores'.format(
                    self.ncores))

        if self.csvlogger:
            headers = ['id'] + self.param_names + ['score']
            with open(self.csvlogger, 'w') as csvfile:
                csvwriter = csv.writer(csvfile,
                                       delimiter=',',
                                       quoting=csv.QUOTE_MINIMAL,
                                       lineterminator='\n')
                csvwriter.writerow(headers)

        core_lst = []
        for i in range(len(self.hyperparameter_cases)):
            core_lst.append([i + 1, self.hyperparameter_cases[i]])

        if self.ncores > 1:
            #p=Pool(self.ncores)
            #results = p.map(self.worker, core_lst)
            #p.close()
            #p.join()

            with joblib.Parallel(n_jobs=self.ncores) as parallel:
                results = parallel(
                    joblib.delayed(self.worker)(item) for item in core_lst)

        else:
            results = []
            for item in core_lst:
                results.append(self.worker(item))

        gridres = pd.DataFrame(self.hyperparameter_cases,
                               columns=self.param_names)
        gridres.index += 1
        gridres['score'] = results
        #gridres = gridres.sort_values(['score'], axis='index', ascending=False)

        return gridres