def test_multiprocessing_speedup(self, verbose=0): if n_cpus > 1: warnings.filterwarnings("ignore") parallel.start_if_needed(n_cpus=n_cpus) X, y = self.X_cls, self.y_cls = make_classification(n_classes=2, n_samples=500, n_features=40, n_informative=20, random_state=test_seed) mclf = MultichannelPipeline(n_channels=1) mclf.add_layer(DummyClassifier(futile_cycles_fit=2000000, futile_cycles_pred=10)) # shut off warnings because ray and redis generate massive numbers SETUP_CODE = ''' import pipecaster.cross_validation''' TEST_CODE = ''' pipecaster.cross_validation.cross_val_score(mclf, [X], y, cv = 5, n_processes = 1)''' t_serial = timeit.timeit(setup = SETUP_CODE, stmt = TEST_CODE, globals = locals(), number = 5) TEST_CODE = ''' pipecaster.cross_validation.cross_val_score(mclf, [X], y, cv = 5, n_processes = {})'''.format(n_cpus) t_parallel = timeit.timeit(setup = SETUP_CODE, stmt = TEST_CODE, globals = locals(), number = 5) warnings.resetwarnings() if verbose > 0: print('serial run mean time = {} s'.format(t_serial)) print('parallel run mean time = {} s'.format(t_parallel)) if t_serial <= t_parallel: warnings.warn('mulitple cpus detected, but parallel cross_val_score not faster than serial, possible problem with multiprocessing')
def test_multi_input_regression_parallel(self): if n_cpus > 1: warnings.filterwarnings("ignore") parallel.start_if_needed(n_cpus=n_cpus) mrgr = MultichannelPipeline(n_channels=1) mrgr.add_layer(self.rgr) pc_scores = pc_cross_validation.cross_val_score(mrgr, [self.X_rgr], self.y_rgr, scorer=explained_variance_score, cv=self.cv, n_processes=n_cpus) self.assertTrue(np.array_equal(self.rgr_scores, pc_scores), 'regressor scores from pipecaster.cross_validation.cross_val_score did not match sklearn control (multi input predictor)') warnings.resetwarnings()
def test_multi_input_classification_parallel(self): if n_cpus > 1: warnings.filterwarnings("ignore") parallel.start_if_needed() mclf = MultichannelPipeline(n_channels=1) mclf.add_layer(self.clf) pc_scores = pc_cross_validation.cross_val_score( mclf, [self.X_cls], self.y_cls, score_method='predict_proba', scorer=roc_auc_score, cv=self.cv, n_processes=n_cpus) self.assertTrue(np.array_equal(self.cls_scores, pc_scores), 'classifier scores from pipecaster.cross_validation.cross_val_score did not match sklearn control (multi input predictor)') warnings.resetwarnings()
def test_multi_input_classification_parallel(self): if n_cpus > 1: warnings.filterwarnings("ignore") parallel.start_if_needed(n_cpus=n_cpus) mclf = MultichannelPipeline(n_channels=1) mclf.add_layer(self.clf) pc_predictions = pc_cross_validation.cross_val_predict( mclf, [self.X_cls], self.y_cls, cv=self.cv, n_processes=n_cpus) self.assertTrue( np.array_equal(self.cls_predictions, pc_predictions['predict']['y_pred']), 'pipecaster predictions did not match sklearn control') warnings.resetwarnings()
def test_throttled_multiprocessing_speedup(self, verbose=0): if n_cpus > 1: warnings.filterwarnings("ignore") parallel.start_if_needed(n_cpus=n_cpus) X, y = self.X_cls, self.y_cls = make_classification( n_classes=2, n_samples=500, n_features=40, n_informative=20, random_state=test_seed) mclf = MultichannelPipeline(n_channels=1) mclf.add_layer( DummyClassifier(futile_cycles_fit=2000000, futile_cycles_pred=10)) SETUP_CODE = ''' import pipecaster.cross_validation''' TEST_CODE = ''' pipecaster.cross_validation.cross_val_predict(mclf, [X], y, cv = {}, n_processes = 1)'''.format( n_cpus - 1) t_serial = timeit.timeit(setup=SETUP_CODE, stmt=TEST_CODE, globals=locals(), number=5) TEST_CODE = ''' pipecaster.cross_validation.cross_val_predict(mclf, [X], y, cv = {}, n_processes = {})'''.format( n_cpus - 1, n_cpus - 1) t_parallel = timeit.timeit(setup=SETUP_CODE, stmt=TEST_CODE, globals=locals(), number=5) warnings.resetwarnings() if verbose > 0: print( 'number of CPUs detected and parallel jobs requested: {}'. format(n_cpus)) print('duration of serial cross cross_val_predict task: {} s'. format(t_serial)) print( 'duration of parallel cross cross_val_predict task (ray pool.starmap): {} s' .format(t_parallel)) if t_serial <= t_parallel: warnings.warn( 'multiple cpus detected, but parallel cross_val_predict not faster than serial using ray.multiprocessing.starmap(), possible problem with multiprocessing' )
def test_multi_input_regression_parallel_starmap(self): if n_cpus > 2: warnings.filterwarnings("ignore") parallel.start_if_needed(n_cpus=n_cpus) mrgr = MultichannelPipeline(n_channels=1) mrgr.add_layer(self.rgr) pc_predictions = pc_cross_validation.cross_val_predict( mrgr, [self.X_rgr], self.y_rgr, cv=self.cv, n_processes=n_cpus - 1) self.assertTrue( np.array_equal(self.rgr_predictions, pc_predictions['predict']['y_pred']), 'pipecaster predictions did not match sklearn ' 'control') warnings.resetwarnings()