def test_parallel(): x = arange(10.) with warnings.catch_warnings(): warnings.simplefilter("ignore") parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0) y = parallel(p_func(i**2) for i in range(10)) testing.assert_equal(x, y)
def test_parallel(): x = arange(10.) with warnings.catch_warnings(): warnings.simplefilter("ignore") parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0) y = parallel(p_func(i**2) for i in range(10)) testing.assert_equal(x,y)
def fit_joblib(self, data_generator, fit_kwds, parallel_backend, init_kwds_generator=None): """Performs the distributed estimation in parallel using joblib Parameters ---------- data_generator : generator A generator that produces a sequence of tuples where the first element in the tuple corresponds to an endog array and the element corresponds to an exog array. fit_kwds : dict-like Keywords needed for the model fitting. parallel_backend : None or joblib parallel_backend object used to allow support for more complicated backends, ex: dask.distributed init_kwds_generator : generator or None Additional keyword generator that produces model init_kwds that may vary based on data partition. The current usecase is for WLS and GLS Returns ------- join_method result. For the default, _join_debiased, it returns a p length array. """ from statsmodels.tools.parallel import parallel_func par, f, n_jobs = parallel_func(_helper_fit_partition, self.partitions) if parallel_backend is None and init_kwds_generator is None: results_l = par( f(self, pnum, endog, exog, fit_kwds) for pnum, (endog, exog) in enumerate(data_generator)) elif parallel_backend is not None and init_kwds_generator is None: with parallel_backend: results_l = par( f(self, pnum, endog, exog, fit_kwds) for pnum, (endog, exog) in enumerate(data_generator)) elif parallel_backend is None and init_kwds_generator is not None: tup_gen = enumerate(zip(data_generator, init_kwds_generator)) results_l = par( f(self, pnum, endog, exog, fit_kwds, init_kwds) for pnum, ((endog, exog), init_kwds) in tup_gen) elif parallel_backend is not None and init_kwds_generator is not None: tup_gen = enumerate(zip(data_generator, init_kwds_generator)) with parallel_backend: results_l = par( f(self, pnum, endog, exog, fit_kwds, init_kwds) for pnum, ((endog, exog), init_kwds) in tup_gen) return results_l
def _run_batch( self, input_data: t.Union["ext.NpNDArray", "ext.PdDataFrame"] ) -> t.Any: # type: ignore[override] # noqa # TODO: type hint return type. parallel, p_func, _ = parallel_func( # type: ignore[arg-type] self._predict_fn, n_jobs=self._num_threads, verbose=0, ) return parallel(p_func(i) for i in input_data)[0] # type: ignore
def fit_joblib(self, data_generator, fit_kwds, parallel_backend, init_kwds_generator=None): """Performs the distributed estimation in parallel using joblib Parameters ---------- data_generator : generator A generator that produces a sequence of tuples where the first element in the tuple corresponds to an endog array and the element corresponds to an exog array. fit_kwds : dict-like Keywords needed for the model fitting. parallel_backend : None or joblib parallel_backend object used to allow support for more complicated backends, ex: dask.distributed init_kwds_generator : generator or None Additional keyword generator that produces model init_kwds that may vary based on data partition. The current usecase is for WLS and GLS Returns ------- join_method result. For the default, _join_debiased, it returns a p length array. """ from statsmodels.tools.parallel import parallel_func par, f, n_jobs = parallel_func(_helper_fit_partition, self.partitions) if parallel_backend is None and init_kwds_generator is None: results_l = par(f(self, pnum, endog, exog, fit_kwds) for pnum, (endog, exog) in enumerate(data_generator)) elif parallel_backend is not None and init_kwds_generator is None: with parallel_backend: results_l = par(f(self, pnum, endog, exog, fit_kwds) for pnum, (endog, exog) in enumerate(data_generator)) elif parallel_backend is None and init_kwds_generator is not None: tup_gen = enumerate(zip(data_generator, init_kwds_generator)) results_l = par(f(self, pnum, endog, exog, fit_kwds, init_kwds) for pnum, ((endog, exog), init_kwds) in tup_gen) elif parallel_backend is not None and init_kwds_generator is not None: tup_gen = enumerate(zip(data_generator, init_kwds_generator)) with parallel_backend: results_l = par(f(self, pnum, endog, exog, fit_kwds, init_kwds) for pnum, ((endog, exog), init_kwds) in tup_gen) return results_l
trends = ('c', 'ct') T = np.array((20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 2000)) T = T[::-1] percentiles = list(np.arange(0.5, 100.0, 0.5)) seeds = np.arange(0, 2**32, step=2**23) for tr in trends: results = np.zeros((len(percentiles), len(T), EX_NUM)) for i in range(EX_NUM): print("Experiment Number {0} of {1} (trend {2})".format( i + 1, EX_NUM, tr)) now = datetime.datetime.now() parallel, p_func, n_jobs = parallel_func(wrapper, n_jobs=NUM_JOBS, verbose=2) out = parallel(p_func(t, tr, EX_SIZE, seed=seeds[i]) for t in T) q = lambda x: np.percentile(x, percentiles) quantiles = map(q, out) results[:, :, i] = np.array(quantiles).T print('Elapsed time {0} seconds'.format(datetime.datetime.now() - now)) if i % 50 == 0: np.savez('dfgls_' + tr + '.npz', trend=tr, results=results, percentiles=percentiles, T=T)
if __name__ == '__main__': trends = ('nc', 'c', 'ct', 'ctt') T = array( (20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, 500, 600, 700, 800, 900, 1000, 1200, 1400, 2000)) T = T[::-1] m = T.shape[0] percentiles = list(arange(0.5, 100.0, 0.5)) seed(0) seeds = random_integers(0, 2 ** 31 - 2, size=EX_NUM) parallel, p_func, n_jobs = parallel_func(wrapper, n_jobs=NUM_JOBS, verbose=2) parallel.pre_dispatch = NUM_JOBS for tr in trends: results = zeros((len(percentiles), len(T), EX_NUM)) * nan filename = 'adf_z_' + tr + '.npz' for i in range(EX_NUM): print("Experiment Number {0} for Trend {1}".format(i + 1, tr)) # Non parallel version # out = lmap(wrapper, T, [tr] * m, [EX_SIZE] * m, [seeds[i]] * m)) now = datetime.datetime.now() out = parallel(p_func(t, tr, EX_SIZE, seed=seeds[i]) for t in T) quantiles = map(lambda x: percentile(x, percentiles), out) results[:, :, i] = array(quantiles).T elapsed = datetime.datetime.now() - now
def test_parallel(): x = arange(10.) parallel, p_func, n_jobs = parallel_func(sqrt, n_jobs=-1, verbose=0) y = parallel(p_func(i**2) for i in range(10)) testing.assert_equal(x,y)