class AtlasStacker(BaseEstimator): def __init__(self, atlas_info_object, hyperpipe_elements, best_config_metric=[], metrics=[]): # ToDo # - Stacker self.atlas_info_object = atlas_info_object self.atlas_name = self.atlas_info_object.atlas_name self.hyperpipe_elements = hyperpipe_elements self.pipeline_fusion = None self.best_config_metric = best_config_metric self.metrics = metrics # self.outer_pipe += pipeline_fusion def generate_hyperpipes(self): if self.atlas_info_object.roi_names_runtime: self.rois = self.atlas_info_object.roi_names_runtime # # self.outer_pipe = Hyperpipe(self.atlas_name + 'outer_pipe', optimizer='grid_search', # metrics=['accuracy'], hyperparameter_specific_config_cv_object= # ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), # hyperparameter_search_cv_object= # ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), # eval_final_performance=True) inner_pipe_list = {} for i in range(len(self.rois)): tmp_inner_pipe = Hyperpipe(self.atlas_name + '_' + str(self.rois[i]), optimizer='grid_search', inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), eval_final_performance=False, verbose=logging.verbosity_level, best_config_metric=self.best_config_metric, metrics=self.metrics) # at first set a filter element roi_filter_element = RoiFilterElement(i) tmp_inner_pipe.filter_element = roi_filter_element # secondly add all other items for pipe_item in self.hyperpipe_elements: tmp_inner_pipe += PipelineElement.create(pipe_item[0], pipe_item[1], **pipe_item[2]) inner_pipe_list[self.rois[i]] = tmp_inner_pipe self.pipeline_fusion = Stack('multiple_source_pipes', inner_pipe_list.values(), voting=False) # Todo: else raise Error def fit(self, X, y=None): if not self.pipeline_fusion and not self.atlas_info_object.roi_names_runtime: raise BaseException('No ROIs could be received from Brain Atlas') elif not self.pipeline_fusion and self.atlas_info_object.roi_names_runtime: self.generate_hyperpipes() self.pipeline_fusion.fit(X, y) return self def transform(self, X, y=None): return self.pipeline_fusion.transform(X, y)
class StackTests(unittest.TestCase): def setUp(self): self.X, self.y = load_breast_cancer(True) self.pca = PipelineElement('PCA', {'n_components': [5, 10]}) self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]}) self.svc = PipelineElement('SVC', {'C': [1, 2]}) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_leaf': [3, 5]}) self.transformer_branch_1 = Branch('TransBranch1', [self.pca.copy_me()]) self.transformer_branch_2 = Branch('TransBranch2', [self.scaler.copy_me()]) self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()]) self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()]) self.transformer_stack = Stack( 'TransformerStack', [self.pca.copy_me(), self.scaler.copy_me()]) self.estimator_stack = Stack( 'EstimatorStack', [self.svc.copy_me(), self.tree.copy_me()]) self.transformer_branch_stack = Stack('TransBranchStack', [ self.transformer_branch_1.copy_me(), self.transformer_branch_2.copy_me() ]) self.estimator_branch_stack = Stack('EstBranchStack', [ self.estimator_branch_1.copy_me(), self.estimator_branch_2.copy_me() ]) self.stacks = [ ([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack), ([self.transformer_branch_1, self.transformer_branch_2], self.transformer_branch_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ] def test_copy_me(self): for stack in self.stacks: stack = stack[1] copy = stack.copy_me() self.assertEqual(stack.random_state, copy.random_state) self.assertFalse( stack.elements[0].__dict__ == copy.elements[0].__dict__) self.assertDictEqual(elements_to_dict(stack), elements_to_dict(copy)) def test_horizontal_stacking(self): for stack in self.stacks: element_1 = stack[0][0] element_2 = stack[0][1] stack = stack[1] # fit elements Xt_1 = element_1.fit(self.X, self.y).transform(self.X, self.y) Xt_2 = element_2.fit(self.X, self.y).transform(self.X, self.y) Xt = stack.fit(self.X, self.y).transform(self.X, self.y) # output of transform() changes depending on whether it is an estimator stack or a transformer stack if isinstance(Xt, tuple): Xt = Xt[0] Xt_1 = Xt_1[0] Xt_2 = Xt_2[0] if len(Xt_1.shape) == 1: Xt_1 = np.reshape(Xt_1, (-1, 1)) Xt_2 = np.reshape(Xt_2, (-1, 1)) self.assertEqual(Xt.shape[1], Xt_1.shape[-1] + Xt_2.shape[-1]) def recursive_assertion(self, element_a, element_b): for key in element_a.keys(): if isinstance(element_a[key], np.ndarray): np.testing.assert_array_equal(element_a[key], element_b[key]) elif isinstance(element_a[key], dict): self.recursive_assertion(element_a[key], element_b[key]) else: self.assertEqual(element_a[key], element_b[key]) def test_fit(self): for elements, stack in [([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack)]: np.random.seed(42) stack = stack.fit(self.X, self.y) np.random.seed(42) for i, element in enumerate(elements): element = element.fit(self.X, self.y) element_dict = elements_to_dict(element) stack_dict = elements_to_dict(stack.elements[i]) self.recursive_assertion(element_dict, stack_dict) def test_transform(self): for elements, stack in self.stacks: np.random.seed(42) Xt_stack, _, _ = stack.fit(self.X, self.y).transform(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element, _, _ = element.fit(self.X, self.y).transform(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(Xt_stack, Xt_elements) def test_predict(self): for elements, stack in [ ([self.svc, self.tree], self.estimator_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]: np.random.seed(42) stack = stack.fit(self.X, self.y) yt_stack = stack.predict(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element = element.fit(self.X, self.y).predict(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(yt_stack, Xt_elements) def test_predict_proba(self): for elements, stack in [ ([self.svc, self.tree], self.estimator_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]: np.random.seed(42) stack = stack.fit(self.X, self.y) yt_stack = stack.predict_proba(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element = element.fit(self.X, self.y).predict_proba(self.X) if Xt_element is None: Xt_element = element.fit(self.X, self.y).predict(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(yt_stack, Xt_elements) def test_inverse_transform(self): with self.assertRaises(NotImplementedError): self.stacks[0][1].fit(self.X, self.y).inverse_transform(self.X) def test_set_params(self): trans_config = { 'PCA__n_components': 2, 'PCA__disabled': True, 'StandardScaler__with_mean': True } est_config = { 'SVC__C': 3, 'DecisionTreeClassifier__min_samples_leaf': 1 } # transformer stack self.transformer_stack.set_params(**trans_config) self.assertEqual( self.transformer_stack.elements[0].base_element.n_components, 2) self.assertEqual(self.transformer_stack.elements[0].disabled, True) self.assertEqual( self.transformer_stack.elements[1].base_element.with_mean, True) # estimator stack self.estimator_stack.set_params(**est_config) self.assertEqual(self.estimator_stack.elements[0].base_element.C, 3) self.assertEqual( self.estimator_stack.elements[1].base_element.min_samples_leaf, 1) with self.assertRaises(ValueError): self.estimator_stack.set_params(**{'any_weird_param': 1}) with self.assertRaises(ValueError): self.transformer_stack.set_params(**{'any_weird_param': 1}) def test_add(self): stack = Stack('MyStack', [ PipelineElement('PCA', {'n_components': [5]}), PipelineElement('FastICA') ]) self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) stack = Stack('MyStack') stack += PipelineElement('PCA', {'n_components': [5]}) stack += PipelineElement('FastICA') self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) def callback(X, y=None): pass stack = Stack('MyStack', [ PipelineElement('PCA'), CallbackElement('MyCallback', callback), Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]), Branch('MyBranch', [PipelineElement('PCA')]) ]) self.assertEqual(len(stack.elements), 4) # test doubled item with self.assertRaises(ValueError): stack += stack.elements[0] stack += PipelineElement('PCA', {'n_components': [10, 20]}) self.assertEqual(stack.elements[-1].name, 'PCA2') self.assertDictEqual( stack.hyperparameters, { 'MyStack__MySwitch__current_element': [(0, 0), (1, 0)], 'MyStack__PCA2__n_components': [10, 20] }) def test_feature_importances(self): # single item self.estimator_stack.fit(self.X, self.y) self.assertIsNone(self.estimator_stack.feature_importances_) self.estimator_branch_stack.fit(self.X, self.y) self.assertIsNone(self.estimator_branch_stack.feature_importances_) def test_use_probabilities(self): self.estimator_stack.use_probabilities = True self.estimator_stack.fit(self.X, self.y) probas = self.estimator_stack.predict(self.X) self.assertEqual(probas.shape[1], 3) self.estimator_stack.use_probabilities = False self.estimator_stack.fit(self.X, self.y) preds = self.estimator_stack.predict(self.X) self.assertEqual(preds.shape[1], 2) probas = self.estimator_stack.predict_proba(self.X) self.assertEqual(probas.shape[1], 3)