def test_copy_me(self): branch = Branch('MyBranch', [self.scaler, self.pca]) copy = branch.copy_me() self.assertEqual(branch.random_state, copy.random_state) self.assertDictEqual(elements_to_dict(copy), elements_to_dict(branch)) copy = branch.copy_me() copy.elements[1].base_element.n_components = 3 self.assertNotEqual(copy.elements[1].base_element.n_components, branch.elements[1].base_element.n_components) fake_copy = branch fake_copy.elements[1].base_element.n_components = 3 self.assertEqual(fake_copy.elements[1].base_element.n_components, branch.elements[1].base_element.n_components)
class StackTests(unittest.TestCase): def setUp(self): self.X, self.y = load_breast_cancer(True) self.pca = PipelineElement('PCA', {'n_components': [5, 10]}) self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]}) self.svc = PipelineElement('SVC', {'C': [1, 2]}) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_leaf': [3, 5]}) self.transformer_branch_1 = Branch('TransBranch1', [self.pca.copy_me()]) self.transformer_branch_2 = Branch('TransBranch2', [self.scaler.copy_me()]) self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()]) self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()]) self.transformer_stack = Stack( 'TransformerStack', [self.pca.copy_me(), self.scaler.copy_me()]) self.estimator_stack = Stack( 'EstimatorStack', [self.svc.copy_me(), self.tree.copy_me()]) self.transformer_branch_stack = Stack('TransBranchStack', [ self.transformer_branch_1.copy_me(), self.transformer_branch_2.copy_me() ]) self.estimator_branch_stack = Stack('EstBranchStack', [ self.estimator_branch_1.copy_me(), self.estimator_branch_2.copy_me() ]) self.stacks = [ ([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack), ([self.transformer_branch_1, self.transformer_branch_2], self.transformer_branch_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ] def test_copy_me(self): for stack in self.stacks: stack = stack[1] copy = stack.copy_me() self.assertEqual(stack.random_state, copy.random_state) self.assertFalse( stack.elements[0].__dict__ == copy.elements[0].__dict__) self.assertDictEqual(elements_to_dict(stack), elements_to_dict(copy)) def test_horizontal_stacking(self): for stack in self.stacks: element_1 = stack[0][0] element_2 = stack[0][1] stack = stack[1] # fit elements Xt_1 = element_1.fit(self.X, self.y).transform(self.X, self.y) Xt_2 = element_2.fit(self.X, self.y).transform(self.X, self.y) Xt = stack.fit(self.X, self.y).transform(self.X, self.y) # output of transform() changes depending on whether it is an estimator stack or a transformer stack if isinstance(Xt, tuple): Xt = Xt[0] Xt_1 = Xt_1[0] Xt_2 = Xt_2[0] if len(Xt_1.shape) == 1: Xt_1 = np.reshape(Xt_1, (-1, 1)) Xt_2 = np.reshape(Xt_2, (-1, 1)) self.assertEqual(Xt.shape[1], Xt_1.shape[-1] + Xt_2.shape[-1]) def recursive_assertion(self, element_a, element_b): for key in element_a.keys(): if isinstance(element_a[key], np.ndarray): np.testing.assert_array_equal(element_a[key], element_b[key]) elif isinstance(element_a[key], dict): self.recursive_assertion(element_a[key], element_b[key]) else: self.assertEqual(element_a[key], element_b[key]) def test_fit(self): for elements, stack in [([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack)]: np.random.seed(42) stack = stack.fit(self.X, self.y) np.random.seed(42) for i, element in enumerate(elements): element = element.fit(self.X, self.y) element_dict = elements_to_dict(element) stack_dict = elements_to_dict(stack.elements[i]) self.recursive_assertion(element_dict, stack_dict) def test_transform(self): for elements, stack in self.stacks: np.random.seed(42) Xt_stack, _, _ = stack.fit(self.X, self.y).transform(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element, _, _ = element.fit(self.X, self.y).transform(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(Xt_stack, Xt_elements) def test_predict(self): for elements, stack in [ ([self.svc, self.tree], self.estimator_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]: np.random.seed(42) stack = stack.fit(self.X, self.y) yt_stack = stack.predict(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element = element.fit(self.X, self.y).predict(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(yt_stack, Xt_elements) def test_predict_proba(self): for elements, stack in [ ([self.svc, self.tree], self.estimator_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]: np.random.seed(42) stack = stack.fit(self.X, self.y) yt_stack = stack.predict_proba(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element = element.fit(self.X, self.y).predict_proba(self.X) if Xt_element is None: Xt_element = element.fit(self.X, self.y).predict(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(yt_stack, Xt_elements) def test_inverse_transform(self): with self.assertRaises(NotImplementedError): self.stacks[0][1].fit(self.X, self.y).inverse_transform(self.X) def test_set_params(self): trans_config = { 'PCA__n_components': 2, 'PCA__disabled': True, 'StandardScaler__with_mean': True } est_config = { 'SVC__C': 3, 'DecisionTreeClassifier__min_samples_leaf': 1 } # transformer stack self.transformer_stack.set_params(**trans_config) self.assertEqual( self.transformer_stack.elements[0].base_element.n_components, 2) self.assertEqual(self.transformer_stack.elements[0].disabled, True) self.assertEqual( self.transformer_stack.elements[1].base_element.with_mean, True) # estimator stack self.estimator_stack.set_params(**est_config) self.assertEqual(self.estimator_stack.elements[0].base_element.C, 3) self.assertEqual( self.estimator_stack.elements[1].base_element.min_samples_leaf, 1) with self.assertRaises(ValueError): self.estimator_stack.set_params(**{'any_weird_param': 1}) with self.assertRaises(ValueError): self.transformer_stack.set_params(**{'any_weird_param': 1}) def test_add(self): stack = Stack('MyStack', [ PipelineElement('PCA', {'n_components': [5]}), PipelineElement('FastICA') ]) self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) stack = Stack('MyStack') stack += PipelineElement('PCA', {'n_components': [5]}) stack += PipelineElement('FastICA') self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) def callback(X, y=None): pass stack = Stack('MyStack', [ PipelineElement('PCA'), CallbackElement('MyCallback', callback), Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]), Branch('MyBranch', [PipelineElement('PCA')]) ]) self.assertEqual(len(stack.elements), 4) # test doubled item with self.assertRaises(ValueError): stack += stack.elements[0] stack += PipelineElement('PCA', {'n_components': [10, 20]}) self.assertEqual(stack.elements[-1].name, 'PCA2') self.assertDictEqual( stack.hyperparameters, { 'MyStack__MySwitch__current_element': [(0, 0), (1, 0)], 'MyStack__PCA2__n_components': [10, 20] }) def test_feature_importances(self): # single item self.estimator_stack.fit(self.X, self.y) self.assertIsNone(self.estimator_stack.feature_importances_) self.estimator_branch_stack.fit(self.X, self.y) self.assertIsNone(self.estimator_branch_stack.feature_importances_) def test_use_probabilities(self): self.estimator_stack.use_probabilities = True self.estimator_stack.fit(self.X, self.y) probas = self.estimator_stack.predict(self.X) self.assertEqual(probas.shape[1], 3) self.estimator_stack.use_probabilities = False self.estimator_stack.fit(self.X, self.y) preds = self.estimator_stack.predict(self.X) self.assertEqual(preds.shape[1], 2) probas = self.estimator_stack.predict_proba(self.X) self.assertEqual(probas.shape[1], 3)
class SwitchTests(unittest.TestCase): def setUp(self): self.X, self.y = load_breast_cancer(True) self.svc = PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]}) self.gpc = PipelineElement('GaussianProcessClassifier') self.pca = PipelineElement('PCA') self.estimator_branch = Branch('estimator_branch', [self.tree.copy_me()]) self.transformer_branch = Branch('transformer_branch', [self.pca.copy_me()]) self.estimator_switch = Switch( 'estimator_switch', [self.svc.copy_me(), self.tree.copy_me(), self.gpc.copy_me()]) self.estimator_switch_with_branch = Switch( 'estimator_switch_with_branch', [self.tree.copy_me(), self.estimator_branch.copy_me()]) self.transformer_switch_with_branch = Switch( 'transformer_switch_with_branch', [self.pca.copy_me(), self.transformer_branch.copy_me()]) self.switch_in_switch = Switch('Switch_in_switch', [ self.transformer_branch.copy_me(), self.transformer_switch_with_branch.copy_me() ]) def test_init(self): self.assertEqual(self.estimator_switch.name, 'estimator_switch') def test_hyperparams(self): # assert number of different configs to test # each config combi for each element: 4 for SVC and 3 for logistic regression = 7 self.assertEqual( len(self.estimator_switch.pipeline_element_configurations), 3) self.assertEqual( len(self.estimator_switch.pipeline_element_configurations[0]), 4) self.assertEqual( len(self.estimator_switch.pipeline_element_configurations[1]), 3) # hyperparameters self.assertDictEqual( self.estimator_switch.hyperparameters, { 'estimator_switch__current_element': [(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (2, 0)] }) # config grid self.assertListEqual(self.estimator_switch.generate_config_grid(), [{ 'estimator_switch__current_element': (0, 0) }, { 'estimator_switch__current_element': (0, 1) }, { 'estimator_switch__current_element': (0, 2) }, { 'estimator_switch__current_element': (0, 3) }, { 'estimator_switch__current_element': (1, 0) }, { 'estimator_switch__current_element': (1, 1) }, { 'estimator_switch__current_element': (1, 2) }, { 'estimator_switch__current_element': (2, 0) }]) def test_set_params(self): # test for grid search false_config = {'current_element': 1} with self.assertRaises(ValueError): self.estimator_switch.set_params(**false_config) correct_config = {'current_element': (0, 1)} self.estimator_switch.set_params(**correct_config) self.assertEqual(self.estimator_switch.base_element.base_element.C, 0.1) self.assertEqual( self.estimator_switch.base_element.base_element.kernel, 'sigmoid') # test for other optimizers smac_config = {'SVC__C': 2, 'SVC__kernel': 'rbf'} self.estimator_switch.set_params(**smac_config) self.assertEqual(self.estimator_switch.base_element.base_element.C, 2) self.assertEqual( self.estimator_switch.base_element.base_element.kernel, 'rbf') def test_fit(self): np.random.seed(42) self.estimator_switch.set_params(**{'current_element': (1, 0)}) self.estimator_switch.fit(self.X, self.y) np.random.seed(42) self.tree.set_params(**{'min_samples_split': 2}) self.tree.fit(self.X, self.y) np.testing.assert_array_equal( self.tree.base_element.feature_importances_, self.estimator_switch.base_element.feature_importances_) def test_transform(self): self.transformer_switch_with_branch.set_params( **{'current_element': (0, 0)}) self.transformer_switch_with_branch.fit(self.X, self.y) self.pca.fit(self.X, self.y) switch_Xt, _, _ = self.transformer_switch_with_branch.transform(self.X) pca_Xt, _, _ = self.pca.transform(self.X) self.assertTrue(np.array_equal(pca_Xt, switch_Xt)) def test_predict(self): self.estimator_switch.set_params(**{'current_element': (1, 0)}) np.random.seed(42) self.estimator_switch.fit(self.X, self.y) self.tree.set_params(**{'min_samples_split': 2}) np.random.seed(42) self.tree.fit(self.X, self.y) switch_preds = self.estimator_switch.predict(self.X) tree_preds = self.tree.predict(self.X) self.assertTrue(np.array_equal(switch_preds, tree_preds)) def test_predict_proba(self): gpc = PipelineElement('GaussianProcessClassifier') svc = PipelineElement('SVC') switch = Switch('EstimatorSwitch', [gpc, svc]) switch.set_params(**{'current_element': (0, 0)}) np.random.seed(42) switch_probas = switch.fit(self.X, self.y).predict_proba(self.X) np.random.seed(42) gpr_probas = self.gpc.fit(self.X, self.y).predict_proba(self.X) self.assertTrue(np.array_equal(switch_probas, gpr_probas)) def test_inverse_transform(self): self.transformer_switch_with_branch.set_params( **{'current_element': (0, 0)}) self.transformer_switch_with_branch.fit(self.X, self.y) self.pca.fit(self.X, self.y) Xt_pca, _, _ = self.pca.transform(self.X) Xt_switch, _, _ = self.transformer_switch_with_branch.transform(self.X) X_pca, _, _ = self.pca.inverse_transform(Xt_pca) X_switch, _, _ = self.transformer_switch_with_branch.inverse_transform( Xt_switch) self.assertTrue(np.array_equal(Xt_pca, Xt_switch)) self.assertTrue(np.array_equal(X_pca, X_switch)) np.testing.assert_almost_equal(X_switch, self.X) def test_base_element(self): switch = Switch('switch', [self.svc, self.tree]) switch.set_params(**{'current_element': (1, 1)}) self.assertIs(switch.base_element, self.tree) self.assertIs(switch.base_element.base_element, self.tree.base_element) # other optimizer switch.set_params(**{'DecisionTreeClassifier__min_samples_split': 2}) self.assertIs(switch.base_element, self.tree) self.assertIs(switch.base_element.base_element, self.tree.base_element) def test_copy_me(self): switches = [ self.estimator_switch, self.estimator_switch_with_branch, self.transformer_switch_with_branch, self.switch_in_switch ] for switch in switches: copy = switch.copy_me() self.assertEqual(switch.random_state, copy.random_state) for i, element in enumerate(copy.elements): self.assertNotEqual(copy.elements[i], switch.elements[i]) switch = elements_to_dict(switch) copy = elements_to_dict(copy) self.assertDictEqual(copy, switch) def test_estimator_type(self): pca = PipelineElement('PCA') ica = PipelineElement('FastICA') svc = PipelineElement('SVC') svr = PipelineElement('SVR') tree_class = PipelineElement('DecisionTreeClassifier') tree_reg = PipelineElement('DecisionTreeRegressor') switch = Switch('MySwitch', [pca, svr]) with self.assertRaises(NotImplementedError): est_type = switch._estimator_type switch = Switch('MySwitch', [svc, svr]) with self.assertRaises(NotImplementedError): est_type = switch._estimator_type switch = Switch('MySwitch', [pca, ica]) self.assertEqual(switch._estimator_type, None) switch = Switch('MySwitch', [tree_class, svc]) self.assertEqual(switch._estimator_type, 'classifier') switch = Switch('MySwitch', [tree_reg, svr]) self.assertEqual(switch._estimator_type, 'regressor') self.assertEqual(self.estimator_switch._estimator_type, 'classifier') self.assertEqual(self.estimator_switch_with_branch._estimator_type, 'classifier') self.assertEqual(self.transformer_switch_with_branch._estimator_type, None) self.assertEqual(self.switch_in_switch._estimator_type, None) def test_add(self): self.assertEqual(len(self.estimator_switch.elements), 3) self.assertEqual(len(self.switch_in_switch.elements), 2) self.assertEqual(len(self.transformer_switch_with_branch.elements), 2) self.assertEqual( list(self.estimator_switch.elements_dict.keys()), ['SVC', 'DecisionTreeClassifier', 'GaussianProcessClassifier']) self.assertEqual( list(self.switch_in_switch.elements_dict.keys()), ['transformer_branch', 'transformer_switch_with_branch']) switch = Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]) switch = Switch('MySwitch2') switch += PipelineElement('PCA') switch += PipelineElement('FastICA') # test doubled names with self.assertRaises(ValueError): self.estimator_switch += self.estimator_switch.elements[0] self.estimator_switch += PipelineElement("SVC") self.assertEqual(self.estimator_switch.elements[-1].name, "SVC2") self.estimator_switch += PipelineElement( "SVC", hyperparameters={'kernel': ['polynomial', 'sigmoid']}) self.assertEqual(self.estimator_switch.elements[-1].name, "SVC3") self.estimator_switch += PipelineElement("SVR") self.assertEqual(self.estimator_switch.elements[-1].name, "SVR") self.estimator_switch += PipelineElement("SVC") self.assertEqual(self.estimator_switch.elements[-1].name, "SVC4") # check that hyperparameters are renamed respectively self.assertEqual( self.estimator_switch.pipeline_element_configurations[4][0] ["SVC3__kernel"], 'polynomial') def test_feature_importances(self): self.estimator_switch.set_params(**{'current_element': (1, 0)}) self.estimator_switch.fit(self.X, self.y) self.assertTrue( len(self.estimator_switch.feature_importances_) == self.X.shape[1]) self.estimator_switch_with_branch.set_params( **{'current_element': (1, 0)}) self.estimator_switch_with_branch.fit(self.X, self.y) self.assertTrue( len(self.estimator_switch_with_branch.feature_importances_) == self.X.shape[1]) self.estimator_switch.set_params(**{'current_element': (2, 0)}) self.estimator_switch.fit(self.X, self.y) self.assertIsNone(self.estimator_branch.feature_importances_) self.switch_in_switch.set_params(**{'current_element': (1, 0)}) self.switch_in_switch.fit(self.X, self.y) self.assertIsNone(self.switch_in_switch.feature_importances_) self.estimator_switch.set_params(**{'current_element': (1, 0)}) self.switch_in_switch.fit(self.X, self.y) self.assertIsNone(self.switch_in_switch.feature_importances_)