Esempio n. 1
0
def test_parallel():
    x = arange(10.)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0)
        y = parallel(p_func(i**2) for i in range(10))
    testing.assert_equal(x, y)
Esempio n. 2
0
def test_parallel():
    x = arange(10.)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0)
        y = parallel(p_func(i**2) for i in range(10))
    testing.assert_equal(x,y)
Esempio n. 3
0
    def fit_joblib(self,
                   data_generator,
                   fit_kwds,
                   parallel_backend,
                   init_kwds_generator=None):
        """Performs the distributed estimation in parallel using joblib

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        """

        from statsmodels.tools.parallel import parallel_func

        par, f, n_jobs = parallel_func(_helper_fit_partition, self.partitions)

        if parallel_backend is None and init_kwds_generator is None:
            results_l = par(
                f(self, pnum, endog, exog, fit_kwds)
                for pnum, (endog, exog) in enumerate(data_generator))

        elif parallel_backend is not None and init_kwds_generator is None:
            with parallel_backend:
                results_l = par(
                    f(self, pnum, endog, exog, fit_kwds)
                    for pnum, (endog, exog) in enumerate(data_generator))

        elif parallel_backend is None and init_kwds_generator is not None:
            tup_gen = enumerate(zip(data_generator, init_kwds_generator))
            results_l = par(
                f(self, pnum, endog, exog, fit_kwds, init_kwds)
                for pnum, ((endog, exog), init_kwds) in tup_gen)

        elif parallel_backend is not None and init_kwds_generator is not None:
            tup_gen = enumerate(zip(data_generator, init_kwds_generator))
            with parallel_backend:
                results_l = par(
                    f(self, pnum, endog, exog, fit_kwds, init_kwds)
                    for pnum, ((endog, exog), init_kwds) in tup_gen)

        return results_l
Esempio n. 4
0
 def _run_batch(
     self, input_data: t.Union["ext.NpNDArray", "ext.PdDataFrame"]
 ) -> t.Any:  # type: ignore[override] # noqa
     # TODO: type hint return type.
     parallel, p_func, _ = parallel_func(  # type: ignore[arg-type]
         self._predict_fn,
         n_jobs=self._num_threads,
         verbose=0,
     )
     return parallel(p_func(i) for i in input_data)[0]  # type: ignore
    def fit_joblib(self, data_generator, fit_kwds, parallel_backend,
                   init_kwds_generator=None):
        """Performs the distributed estimation in parallel using joblib

        Parameters
        ----------
        data_generator : generator
            A generator that produces a sequence of tuples where the first
            element in the tuple corresponds to an endog array and the
            element corresponds to an exog array.
        fit_kwds : dict-like
            Keywords needed for the model fitting.
        parallel_backend : None or joblib parallel_backend object
            used to allow support for more complicated backends,
            ex: dask.distributed
        init_kwds_generator : generator or None
            Additional keyword generator that produces model init_kwds
            that may vary based on data partition.  The current usecase
            is for WLS and GLS

        Returns
        -------
        join_method result.  For the default, _join_debiased, it returns a
        p length array.
        """

        from statsmodels.tools.parallel import parallel_func

        par, f, n_jobs = parallel_func(_helper_fit_partition, self.partitions)

        if parallel_backend is None and init_kwds_generator is None:
            results_l = par(f(self, pnum, endog, exog, fit_kwds)
                            for pnum, (endog, exog)
                            in enumerate(data_generator))

        elif parallel_backend is not None and init_kwds_generator is None:
            with parallel_backend:
                results_l = par(f(self, pnum, endog, exog, fit_kwds)
                                for pnum, (endog, exog)
                                in enumerate(data_generator))

        elif parallel_backend is None and init_kwds_generator is not None:
            tup_gen = enumerate(zip(data_generator, init_kwds_generator))
            results_l = par(f(self, pnum, endog, exog, fit_kwds, init_kwds)
                            for pnum, ((endog, exog), init_kwds)
                            in tup_gen)

        elif parallel_backend is not None and init_kwds_generator is not None:
            tup_gen = enumerate(zip(data_generator, init_kwds_generator))
            with parallel_backend:
                results_l = par(f(self, pnum, endog, exog, fit_kwds, init_kwds)
                                for pnum, ((endog, exog), init_kwds)
                                in tup_gen)

        return results_l
    trends = ('c', 'ct')
    T = np.array((20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140,
                  160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800,
                  900, 1000, 1200, 1400, 2000))
    T = T[::-1]
    percentiles = list(np.arange(0.5, 100.0, 0.5))
    seeds = np.arange(0, 2**32, step=2**23)
    for tr in trends:
        results = np.zeros((len(percentiles), len(T), EX_NUM))

        for i in range(EX_NUM):
            print("Experiment Number {0} of {1} (trend {2})".format(
                i + 1, EX_NUM, tr))
            now = datetime.datetime.now()
            parallel, p_func, n_jobs = parallel_func(wrapper,
                                                     n_jobs=NUM_JOBS,
                                                     verbose=2)
            out = parallel(p_func(t, tr, EX_SIZE, seed=seeds[i]) for t in T)
            q = lambda x: np.percentile(x, percentiles)
            quantiles = map(q, out)
            results[:, :, i] = np.array(quantiles).T
            print('Elapsed time {0} seconds'.format(datetime.datetime.now() -
                                                    now))

            if i % 50 == 0:
                np.savez('dfgls_' + tr + '.npz',
                         trend=tr,
                         results=results,
                         percentiles=percentiles,
                         T=T)

if __name__ == '__main__':
    trends = ('nc', 'c', 'ct', 'ctt')
    T = array(
        (20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160,
         180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900,
         1000, 1200, 1400, 2000))
    T = T[::-1]
    m = T.shape[0]
    percentiles = list(arange(0.5, 100.0, 0.5))
    seed(0)
    seeds = random_integers(0, 2 ** 31 - 2, size=EX_NUM)

    parallel, p_func, n_jobs = parallel_func(wrapper,
                                             n_jobs=NUM_JOBS,
                                             verbose=2)
    parallel.pre_dispatch = NUM_JOBS
    for tr in trends:
        results = zeros((len(percentiles), len(T), EX_NUM)) * nan
        filename = 'adf_z_' + tr + '.npz'

        for i in range(EX_NUM):
            print("Experiment Number {0} for Trend {1}".format(i + 1, tr))
            # Non parallel version
            # out = lmap(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m))
            now = datetime.datetime.now()
            out = parallel(p_func(t, tr, EX_SIZE, seed=seeds[i]) for t in T)
            quantiles = map(lambda x: percentile(x, percentiles), out)
            results[:, :, i] = array(quantiles).T
            elapsed = datetime.datetime.now() - now
Esempio n. 8
0
def test_parallel():
    x = arange(10.)
    parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0)
    y = parallel(p_func(i**2) for i in range(10))
    testing.assert_equal(x,y)