def test_copy_me(self): switch = Switch("my_copy_switch") switch += PipelineElement("StandardScaler") switch += PipelineElement("RobustScaler", test_disabled=True) stack = Stack("RandomStack") stack += PipelineElement("SVC") branch = Branch('Random_Branch') pca_hyperparameters = {'n_components': [5, 10]} branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters) branch += PipelineElement("DecisionTreeClassifier") stack += branch photon_pipe = PhotonPipeline([("SimpleImputer", PipelineElement("SimpleImputer")), ("my_copy_switch", switch), ('RandomStack', stack), ('Callback1', CallbackElement('tmp_callback', np.mean)), ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier"))]) copy_of_the_pipe = photon_pipe.copy_me() self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state) self.assertTrue(len(copy_of_the_pipe.elements) == 5) self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack") self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].elements[1].test_disabled) self.assertDictEqual(copy_of_the_pipe.elements[2][1].elements[1].elements[0].hyperparameters, {"PCA__n_components": [5, 10]}) self.assertTrue(isinstance(copy_of_the_pipe.elements[3][1], CallbackElement)) self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].delegate_function == np.mean)
def test_sanity_check_item_for_add(self): valid_type = PipelineElement('StandardScaler') valid_type2 = CallbackElement('my_callback', None) invalid_type = StandardScaler() invalid_type2 = Preprocessing() PipelineElement.sanity_check_element_type_for_building_photon_pipes( valid_type, PipelineElement) PipelineElement.sanity_check_element_type_for_building_photon_pipes( valid_type2, PipelineElement) with self.assertRaises(TypeError): PipelineElement.sanity_check_element_type_for_building_photon_pipes( invalid_type, PipelineElement) with self.assertRaises(TypeError): PipelineElement.sanity_check_element_type_for_building_photon_pipes( invalid_type2, PipelineElement) classes_to_test = [Stack, Switch, Branch, Preprocessing] for photon_class in classes_to_test: # we name it SVC so it suits all classes if photon_class is Preprocessing: instance = photon_class() else: instance = photon_class('tmp_instance') instance.add(valid_type) instance.add(valid_type2) with self.assertRaises(TypeError): instance.add(invalid_type) with self.assertRaises(TypeError): instance.add(invalid_type2)
def test_add(self): # assure pipeline has two elements, first the pca and second the svc self.assertEqual(len(self.hyperpipe.elements), 3) self.assertIs(self.hyperpipe.elements[0], self.ss_pipe_element) self.assertIs(self.hyperpipe.elements[1], self.pca_pipe_element) self.assertIs(self.hyperpipe.elements[2], self.svc_pipe_element) # todo : assure that no two elements can be added with the same name # test add method special cases with self.assertRaises(TypeError): self.hyperpipe.add(object()) # assure that preprocessing is identified and set to the extra variable, there is only one preprocessing item my_preproc = Preprocessing() self.hyperpipe.add(my_preproc) self.assertEqual(my_preproc, self.hyperpipe.preprocessing) # make sure the element does not end up in the main pipeline self.assertTrue([item is not my_preproc for item in self.hyperpipe.elements]) def my_func(X, y, **kwargs): return True # test adding callback item my_call_back_item = CallbackElement("test_element", my_func, "predict") self.hyperpipe.add(my_call_back_item) self.assertIs(self.hyperpipe.elements[-1], my_call_back_item)
def test_sanity_check_pipe(self): test_branch = Branch('my_test_branch') def callback_func(X, y, **kwargs): pass with self.assertRaises(Warning): my_callback = CallbackElement('final_element_callback', delegate_function=callback_func) test_branch += my_callback no_callback_pipe = test_branch.prepare_photon_pipe( test_branch.elements) test_branch.sanity_check_pipeline(no_callback_pipe) self.assertFalse(no_callback_pipe[-1] is not my_callback)
def test_estimator_type(self): def callback(X, y=None): pass transformer_branch = Branch( 'TransBranch', [PipelineElement('PCA'), PipelineElement('FastICA')]) classifier_branch = Branch('ClassBranch', [PipelineElement('SVC')]) regressor_branch = Branch('RegBranch', [PipelineElement('SVR')]) callback_branch = Branch( 'CallBranch', [PipelineElement('SVR'), CallbackElement('callback', callback)]) self.assertEqual(transformer_branch._estimator_type, None) self.assertEqual(classifier_branch._estimator_type, 'classifier') self.assertEqual(regressor_branch._estimator_type, 'regressor') self.assertEqual(callback_branch._estimator_type, None)
def test_estimation_type(self): def callback(X, y=None, **kwargs): pass pipe = Hyperpipe( "name", inner_cv=KFold(n_splits=2), best_config_metric="mean_squared_error" ) with self.assertRaises(NotImplementedError): pipe += PipelineElement("PCA") est_type = pipe.estimation_type pipe += PipelineElement("SVC") self.assertEqual(pipe.estimation_type, "classifier") pipe.elements[-1] = PipelineElement("SVR") self.assertEqual(pipe.estimation_type, "regressor") with self.assertRaises(NotImplementedError): pipe.elements[-1] = CallbackElement("MyCallback", callback) est_type = pipe.estimation_type
def test_estimation_type(self): def callback(X, y=None, **kwargs): pass pipe = Hyperpipe('name', inner_cv=KFold(n_splits=2), best_config_metric='mean_squared_error') with self.assertRaises(NotImplementedError): pipe += PipelineElement('PCA') est_type = pipe.estimation_type pipe += PipelineElement('SVC') self.assertEqual(pipe.estimation_type, 'classifier') pipe.elements[-1] = PipelineElement('SVR') self.assertEqual(pipe.estimation_type, 'regressor') with self.assertRaises(NotImplementedError): pipe.elements[-1] = CallbackElement('MyCallback', callback) est_type = pipe.estimation_type
def test_add(self): stack = Stack('MyStack', [ PipelineElement('PCA', {'n_components': [5]}), PipelineElement('FastICA') ]) self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) stack = Stack('MyStack') stack += PipelineElement('PCA', {'n_components': [5]}) stack += PipelineElement('FastICA') self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) def callback(X, y=None): pass stack = Stack('MyStack', [ PipelineElement('PCA'), CallbackElement('MyCallback', callback), Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]), Branch('MyBranch', [PipelineElement('PCA')]) ]) self.assertEqual(len(stack.elements), 4) # test doubled item with self.assertRaises(ValueError): stack += stack.elements[0] stack += PipelineElement('PCA', {'n_components': [10, 20]}) self.assertEqual(stack.elements[-1].name, 'PCA2') self.assertDictEqual( stack.hyperparameters, { 'MyStack__MySwitch__current_element': [(0, 0), (1, 0)], 'MyStack__PCA2__n_components': [10, 20] })
def test_callback(self): self.a = None def my_monitor(X, y=None, **kwargs): self.a = X nb = NeuroBranch('neuro_branch') nb += PipelineElement('SmoothImages', fwhm=10) nb += PipelineElement('ResampleImages', voxel_size=5) nb += CallbackElement("monitor", my_monitor) nb.base_element.cache_folder = self.cache_folder_path CacheManager.clear_cache_files(nb.base_element.cache_folder, True) # set the config so that caching works nb.set_params(**{ 'SmoothImages__fwhm': 10, 'ResampleImages__voxel_size': 5 }) nb.transform(self.X[:1]) self.assertIsInstance(self.a[0], Nifti1Image)
X, y = load_boston(True) # DESIGN YOUR PIPELINE settings = OutputSettings(project_folder='./tmp/') my_pipe = Hyperpipe('basic_svm_pipe_no_performance', optimizer='grid_search', metrics=['mean_squared_error', 'pearson_correlation'], best_config_metric='mean_squared_error', outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=3), verbosity=1, output_settings=settings) # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe += PipelineElement('StandardScaler') my_pipe += CallbackElement("monitor", my_monitor) # engage and optimize the good old SVM for Classification my_pipe += PipelineElement('RandomForestRegressor', hyperparameters={'n_estimators':[10]}) # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) debug = True
"SmoothImages", {"fwhm": Categorical([6, 8])}, batch_size=20 ) # now, apply a brain atlas and extract 4 ROIs # set "extract_mode" to "vec" so that all voxels within these ROIs are vectorized and concatenated neuro_branch += PipelineElement( "BrainAtlas", hyperparameters={}, rois=["Hippocampus_L", "Hippocampus_R", "Amygdala_L", "Amygdala_R"], atlas_name="AAL", extract_mode="vec", batch_size=20, ) # finally, add your neuro branch to your hyperpipe neuro_branch += CallbackElement("NeuroCallback", my_monitor) my_pipe += neuro_branch # my_pipe += CallbackElement('NeuroCallback', my_monitor) # now, add standard ML algorithms to your liking feature_engineering = Branch("FeatureEngineering") feature_engineering += PipelineElement("StandardScaler") my_pipe += feature_engineering my_pipe += CallbackElement("FECallback", my_monitor) my_pipe += PipelineElement( "SVR", hyperparameters={"kernel": Categorical(["rbf", "linear"])}, gamma="scale" ) # NOW TRAIN YOUR PIPELINE
def setUp(self): def callback(X, y=None, **kwargs): self.assertEqual(X.shape, (569, 30)) print("Shape of transformed data: {}".format(X.shape)) def predict_callback(X, y=None, **kwargs): self.assertEqual(X.shape, (569, )) print('Shape of predictions: {}'.format(X.shape)) def callback_test_equality(X, y=None, **kwargs): self.assertTrue(np.array_equal(self.X, X)) if y is not None: self.assertListEqual(self.y.tolist(), y.tolist()) self.X, self.y = load_breast_cancer(True) self.clean_pipeline = PhotonPipeline( elements=[('PCA', PipelineElement('PCA')), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.callback_pipeline = PhotonPipeline(elements=[( 'First', CallbackElement('First', callback)), ( 'PCA', PipelineElement('PCA') ), ('Second', CallbackElement('Second', callback) ), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.clean_branch_pipeline = PhotonPipeline( elements=[('MyBranch', Branch('MyBranch', [PipelineElement('PCA')])), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.callback_branch_pipeline = PhotonPipeline( elements=[('First', CallbackElement('First', callback)), ('MyBranch', Branch('MyBranch', [ CallbackElement('Second', callback), PipelineElement('PCA') ])), ('Fourth', CallbackElement('Fourth', callback)), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.callback_branch_pipeline_error = PhotonPipeline( elements=[('First', CallbackElement('First', callback)), ('MyBranch', Branch('MyBranch', [ CallbackElement('Second', callback), PipelineElement('PCA'), CallbackElement('Third', callback) ])), ('Fourth', CallbackElement('Fourth', callback)), ('LogisticRegression', PipelineElement('LogisticRegression') ), ('Fifth', CallbackElement('Fifth', predict_callback))]) # test that data is unaffected from pipeline self.callback_after_callback_pipeline = PhotonPipeline([ ('Callback1', CallbackElement('Callback1', callback)), ('Callback2', CallbackElement('Callback2', callback_test_equality)), ('StandarcScaler', PipelineElement('StandardScaler'), ('SVR', PipelineElement('SVR'))) ])
# resample images to a desired voxel size - this also works with voxel_size as hyperparameter # it's also very reasonable to define a batch size for a large number of subjects neuro_branch += PipelineElement('ResampleImages', hyperparameters={'voxel_size': Categorical([3, 5])}, batch_size=20) # additionally, you can smooth the entire image neuro_branch += PipelineElement('SmoothImages', {'fwhm': Categorical([6, 8])}, batch_size=20) # now, apply a brain atlas and extract 4 ROIs # set "extract_mode" to "vec" so that all voxels within these ROIs are vectorized and concatenated neuro_branch += PipelineElement('BrainAtlas', hyperparameters={}, rois=['Hippocampus_L', 'Hippocampus_R', 'Amygdala_L', 'Amygdala_R'], atlas_name="AAL", extract_mode='vec', batch_size=20) # finally, add your neuro branch to your hyperpipe neuro_branch += CallbackElement('NeuroCallback', my_monitor) my_pipe += neuro_branch # my_pipe += CallbackElement('NeuroCallback', my_monitor) # now, add standard ML algorithms to your liking feature_engineering = Branch('FeatureEngineering') feature_engineering += PipelineElement('StandardScaler') my_pipe += feature_engineering my_pipe += CallbackElement('FECallback', my_monitor) my_pipe += PipelineElement('SVR', hyperparameters={'kernel': Categorical(['rbf', 'linear'])}, gamma='scale') # NOW TRAIN YOUR PIPELINE start_time = time.time() my_pipe.fit(X, y)