def setUp(self): self.outer_hyperpipe = Hyperpipe('outer_pipe', KFold(n_splits=2)) # set up inner pipeline self.inner_hyperpipe = Hyperpipe( 'inner_pipe', KFold(n_splits=2), optimizer=self.outer_hyperpipe.optimizer, local_search=False) self.inner_pipeline_test_element = PipelineElement.create( 'test_wrapper') self.inner_hyperpipe += self.inner_pipeline_test_element self.pipeline_fusion = PipelineStacking('fusion_element', [self.inner_hyperpipe]) # set up outer pipeline self.outer_pipeline_test_element = PipelineElement.create( 'test_wrapper') self.outer_hyperpipe += self.outer_pipeline_test_element self.outer_hyperpipe += self.pipeline_fusion self.X = np.arange(1, 101) self.y = np.ones((100, )) self.inner_hyperpipe.debug_cv_mode = True self.outer_hyperpipe.debug_cv_mode = True
def setUp(self): self.pca_pipe_element = PipelineElement.create( 'pca', {'n_components': [1, 2]}, test_disabled=True) self.svc_pipe_element = PipelineElement.create('svc', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] })
def setUp(self): self.svc_pipe_element = PipelineElement.create('svc', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) self.lr_pipe_element = PipelineElement.create('logistic', {'C': [0.1, 0.3, 1]}) self.pipe_switch = PipelineSwitch( 'switch', [self.svc_pipe_element, self.lr_pipe_element])
def setUp(self): self.pca_pipe_element = PipelineElement.create( 'pca', {'n_components': [1, 2]}, test_disabled=True) self.svc_pipe_element = PipelineElement.create('svc', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) self.cv_object = KFold(n_splits=3) self.hyperpipe = Hyperpipe('god', self.cv_object) self.hyperpipe += self.pca_pipe_element self.hyperpipe.add(self.svc_pipe_element)
def testCaseA(self): pca_n_components = [2, 5] svc_c = [.1, 1, 5] #svc_kernel = ['rbf'] svc_kernel = ['rbf', 'linear'] # SET UP HYPERPIPE my_pipe = Hyperpipe('primary_pipe', optimizer='grid_search', optimizer_params={}, metrics=['accuracy', 'precision', 'f1_score'], inner_cv=KFold(n_splits=2, random_state=3), eval_final_performance=False) my_pipe += PipelineElement.create('standard_scaler') my_pipe += PipelineElement.create('pca', {'n_components': pca_n_components}) my_pipe += PipelineElement.create('svc', { 'C': svc_c, 'kernel': svc_kernel }) # START HYPERPARAMETER SEARCH my_pipe.fit(self.__X, self.__y) print(my_pipe._test_performances) pipe_results = {'train': [], 'test': []} for i in range(len(my_pipe._performance_history_list)): pipe_results['train'].extend(my_pipe._performance_history_list[i] ['accuracy_folds']['train']) pipe_results['test'].extend( my_pipe._performance_history_list[i]['accuracy_folds']['test']) print('\n\n') print('Running sklearn version...') #cv_outer = KFold(n_splits=2, random_state=3) cv_inner_1 = KFold(n_splits=2, random_state=3) sk_results = {'train': [], 'test': []} for n_comp in pca_n_components: for c in svc_c: for current_kernel in svc_kernel: tr_acc = [] val_acc = [] for train_2, val_1 in cv_inner_1.split(self.__X): data_train_2 = self.__X[train_2] print(data_train_2.shape) data_val_1 = self.__X[val_1] y_train_2 = self.__y[train_2] y_val_1 = self.__y[val_1] my_scaler = StandardScaler() my_scaler.fit(data_train_2) data_train_2 = my_scaler.transform(data_train_2) data_val_1 = my_scaler.transform(data_val_1) # Run PCA my_pca = PCA(n_components=n_comp) my_pca.fit(data_train_2) data_tr_2_pca = my_pca.transform(data_train_2) data_val_1_pca = my_pca.transform(data_val_1) # Run SVC my_svc = SVC(kernel=current_kernel, C=c) my_svc.fit(data_tr_2_pca, y_train_2) tr_acc.append(my_svc.score(data_tr_2_pca, y_train_2)) val_acc.append(my_svc.score(data_val_1_pca, y_val_1)) print('n_components: ', n_comp, 'kernel:', current_kernel, 'c:', c) print('Training 2:', tr_acc[-1], 'validation 1:', val_acc[-1]) sk_results['train'].extend(tr_acc) sk_results['test'].extend(val_acc) print('\nCompare results of last iteration (outer cv)...') print('SkL Train:', sk_results['train']) print('Pipe Train:', pipe_results['train']) print('SkL test: ', sk_results['test']) print('Pipe test: ', pipe_results['test']) self.assertEqual(sk_results['test'], pipe_results['test']) self.assertEqual(sk_results['train'], pipe_results['train'])
def testCaseA(self): pca_n_components = [2, 5] svc_c = [.1, 1] svc_kernel = ['rbf'] # svc_kernel = ['rbf','linear'] # SET UP HYPERPIPE my_pipe = Hyperpipe('primary_pipe', optimizer='grid_search', optimizer_params={}, inner_cv=KFold( n_splits=2, random_state=3), outer_cv=KFold( n_splits=2, random_state=3), verbose=2, eval_final_performance=True) my_pipe += PipelineElement.create('standard_scaler') my_pipe += PipelineElement.create('pca', {'n_components': pca_n_components}) my_pipe += PipelineElement.create('svc', {'C': svc_c, 'kernel': svc_kernel}) # START HYPERPARAMETER SEARCH my_pipe.fit(self.__X, self.__y) from Framework import LogExtractor log_ex = LogExtractor.LogExtractor(my_pipe.result_tree) log_ex.extract_csv("test_case_A2.csv") # print(my_pipe.test_performances) # pipe_results = {'train': [], 'test': []} # for i in range(len(my_pipe.performance_history_list)): # pipe_results['train'].extend( # my_pipe.performance_history_list[i]['accuracy_folds']['train']) # pipe_results['test'].extend( # my_pipe.performance_history_list[i]['accuracy_folds']['test']) print('\n\n') print('Running sklearn version...') cv_outer = KFold(n_splits=2, random_state=3) cv_inner_1 = KFold(n_splits=2, random_state=3) for train_1, test in cv_outer.split(self.__X): data_train_1 = self.__X[train_1] data_test = self.__X[test] y_train_1 = self.__y[train_1] y_test = self.__y[test] sk_results = {'train': [], 'test': []} for n_comp in pca_n_components: for current_kernel in svc_kernel: for c in svc_c: tr_acc = [] val_acc = [] for train_2, val_1 in cv_inner_1.split( data_train_1): data_train_2 = data_train_1[train_2] data_val_1 = data_train_1[val_1] y_train_2 = y_train_1[train_2] y_val_1 = y_train_1[val_1] my_scaler = StandardScaler() my_scaler.fit(data_train_2) data_train_2 = my_scaler.transform(data_train_2) data_val_1 = my_scaler.transform(data_val_1) # Run PCA my_pca = PCA(n_components=n_comp) my_pca.fit(data_train_2) data_tr_2_pca = my_pca.transform(data_train_2) data_val_1_pca = my_pca.transform(data_val_1) # Run SVC my_svc = SVC(kernel=current_kernel, C=c) my_svc.fit(data_tr_2_pca, y_train_2) tr_acc.append(my_svc.score(data_tr_2_pca, y_train_2)) val_acc.append(my_svc.score(data_val_1_pca, y_val_1)) print('n_components: ', n_comp, 'kernel:', current_kernel, 'c:', c) print('Training 2:', tr_acc[-1], 'validation 1:', val_acc[-1]) sk_results['train'].extend(tr_acc) sk_results['test'].extend(val_acc) print('\nCompare results of last iteration (outer cv)...') print('SkL Train:', sk_results['train']) print('Pipe Train:', pipe_results['train']) print('SkL test: ', sk_results['test']) print('Pipe test: ', pipe_results['test']) self.assertEqual(sk_results['test'], pipe_results['test']) self.assertEqual(sk_results['train'], pipe_results['train'])
def testCaseB(self): pca_n_components = [7, 15, 10] svc_c = [.1, 1] #svc_kernel = ['rbf'] svc_kernel = ['rbf', 'linear'] cv_outer = ShuffleSplit(n_splits=1, test_size=0.2, random_state=3) cv_inner_1 = ShuffleSplit(n_splits=1, test_size=0.2, random_state=3) cv_inner_2 = ShuffleSplit(n_splits=1, test_size=0.2, random_state=3) # SET UP HYPERPIPE outer_pipe = Hyperpipe('outer_pipe', optimizer='grid_search', metrics=['accuracy'], inner_cv=cv_inner_1, outer_cv=cv_outer, eval_final_performance=True) inner_pipe = Hyperpipe('pca_pipe', optimizer='grid_search', inner_cv=cv_inner_2, eval_final_performance=False) inner_pipe.add(PipelineElement.create('standard_scaler')) inner_pipe.add( PipelineElement.create('ae_pca', {'n_components': pca_n_components})) pipeline_fusion = PipelineStacking('fusion_element', [inner_pipe]) outer_pipe.add(pipeline_fusion) outer_pipe.add( PipelineElement.create('svc', { 'C': svc_c, 'kernel': svc_kernel })) # START HYPERPARAMETER SEARCH outer_pipe.fit(self.__X, self.__y) print(outer_pipe._test_performances) pipe_results = {'train': [], 'test': []} for i in range(len(outer_pipe._performance_history_list)): pipe_results['train'].extend( outer_pipe._performance_history_list[i]['accuracy_folds'] ['train']) pipe_results['test'].extend(outer_pipe._performance_history_list[i] ['accuracy_folds']['test']) print(outer_pipe._test_performances['accuracy']) print('\n\n') print('Running sklearn version...\n') opt_tr_acc = [] opt_test_acc = [] for train_1, test in cv_outer.split(self.__X): data_train_1 = self.__X[train_1] data_test = self.__X[test] y_train_1 = self.__y[train_1] y_test = self.__y[test] config_inner_1 = {'C': [], 'kernel': []} sk_results_inner1 = { 'train_2': [], 'val_1': [], 'train_2_mean': [], 'val_1_mean': [] } print('Outer Split') print('n train_1:', data_train_1.shape[0], '\n') for c in svc_c: for current_kernel in svc_kernel: config_inner_1['C'].extend([c]) config_inner_1['kernel'].extend([current_kernel]) print('C:', c, 'Kernel:', current_kernel, '\n') svc_score_tr = [] svc_score_te = [] fold_cnt = 1 for train_2, val_1 in cv_inner_1.split(data_train_1): print('\n\nSklearn Outer Pipe FoldMetrics', fold_cnt) data_train_2 = data_train_1[train_2] data_val_1 = data_train_1[val_1] y_train_2 = y_train_1[train_2] y_val_1 = y_train_1[val_1] print('n train_2:', data_train_2.shape[0], '\n') config_inner_2 = {'n_comp': []} print('Sklearn PCA Pipe') sk_results_inner2 = { 'train_3': [], 'val_2': [], 'train_3_mean': [], 'val_2_mean': [] } for n_comp in pca_n_components: config_inner_2['n_comp'].extend([n_comp]) tr_acc = [] val_acc = [] # print('Some training data:', # data_train_2[0:2, 0:2]) for train_3, val_2 in cv_inner_2.split( data_train_2): data_train_3 = data_train_2[train_3] data_val_2 = data_train_2[val_2] my_scaler = StandardScaler() my_scaler.fit(data_train_3) data_train_3 = my_scaler.transform( data_train_3) data_val_2 = my_scaler.transform(data_val_2) # Run PCA my_pca = PCA_AE_Wrapper(n_components=n_comp) my_pca.fit(data_train_3) mae_tr = my_pca.score(data_train_3) mae_te = my_pca.score(data_val_2) tr_acc.append(mae_tr) val_acc.append(mae_te) sk_results_inner2['train_3'].extend(tr_acc) sk_results_inner2['val_2'].extend(val_acc) sk_results_inner2['train_3_mean'].extend( [np.mean(tr_acc)]) sk_results_inner2['val_2_mean'].extend( [np.mean(val_acc)]) print('n_comp:', n_comp) print('n train_3 fold 1:', data_train_3.shape[0]) print('Training 3 mean:', [np.mean(tr_acc)], 'validation 2 mean:', [np.mean(val_acc)]) # find best config for val 2 best_config_id = np.argmin( sk_results_inner2['val_2_mean']) print('Best PCA config:', config_inner_2['n_comp'][best_config_id], '\n') # fit optimum pipe my_scaler = StandardScaler() my_scaler.fit(data_train_2) data_train_2 = my_scaler.transform(data_train_2) data_val_1 = my_scaler.transform(data_val_1) # Run PCA my_pca = PCA_AE_Wrapper( n_components=config_inner_2['n_comp'] [best_config_id]) my_pca.fit(data_train_2) data_tr_2_pca = my_pca.transform(data_train_2) data_val_1_pca = my_pca.transform(data_val_1) # Run SVC my_svc = SVC(kernel=current_kernel, C=c) my_svc.fit(data_tr_2_pca, y_train_2) svc_score_tr.append( my_svc.score(data_tr_2_pca, y_train_2)) svc_score_te.append( my_svc.score(data_val_1_pca, y_val_1)) print('Fit Optimum PCA Config and train with SVC') print('n train 2:', data_train_2.shape[0]) print('n_comp:', config_inner_2['n_comp'][best_config_id]) print('SVC Train:', svc_score_tr[-1]) print('SVC test:', svc_score_te[-1], '\n\n') sk_results_inner1['train_2'].append(svc_score_tr[-1]) sk_results_inner1['val_1'].append(svc_score_te[-1]) fold_cnt += 1 sk_results_inner1['train_2_mean'].append( np.mean(svc_score_tr)) sk_results_inner1['val_1_mean'].append( np.mean(svc_score_te)) print('\nNow find best config for SVC...') best_config_id_inner_1 = np.argmax(sk_results_inner1['val_1_mean']) print('Some test data:') print(data_test.shape) print(data_test[0:2, 0:2]) # fit optimum pipe my_scaler = StandardScaler() my_scaler.fit(data_train_1) data_train_1 = my_scaler.transform(data_train_1) data_test = my_scaler.transform(data_test) # Run PCA my_pca = PCA_AE_Wrapper( n_components=config_inner_2['n_comp'][best_config_id]) my_pca.fit(data_train_1) data_tr_1_pca = my_pca.transform(data_train_1) data_test_pca = my_pca.transform(data_test) # Run SVC my_svc = SVC( kernel=config_inner_1['kernel'][best_config_id_inner_1], C=config_inner_1['C'][best_config_id_inner_1]) print('Best overall config:...') print('C = ', config_inner_1['C'][best_config_id_inner_1]) print('kernel=', config_inner_1['kernel'][best_config_id_inner_1]) print('pca_n_comp=', config_inner_2['n_comp'][best_config_id]) print('n train 1:', data_train_1.shape[0]) my_svc.fit(data_tr_1_pca, y_train_1) opt_tr_acc.append(my_svc.score(data_tr_1_pca, y_train_1)) opt_test_acc.append(my_svc.score(data_test_pca, y_test)) print('Train Acc:', opt_tr_acc[-1]) print('test Acc:', opt_test_acc[-1]) print('\nCompare results of last iteration (outer cv)...') print('SkL Train:', sk_results_inner1['train_2']) print('Pipe Train:', pipe_results['train']) print('SkL test: ', sk_results_inner1['val_1']) print('Pipe test: ', pipe_results['test']) print('\nEval final performance:') print('Pipe final perf:', outer_pipe._test_performances['accuracy']) print('Sklearn final perf:', opt_test_acc) self.assertEqual(sk_results_inner1['train_2'], pipe_results['train']) self.assertEqual(sk_results_inner1['val_1'], pipe_results['test']) self.assertEqual(opt_test_acc, outer_pipe._test_performances['accuracy'])
def testCaseC2(self): pca_n_components = [5, 10] svc_c = [0.1] svc_c_2 = [1] #svc_kernel = ['rbf'] svc_kernel = ['linear'] # SET UP HYPERPIPE outer_pipe = Hyperpipe('outer_pipe', optimizer='grid_search', metrics=['accuracy'], inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), outer_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), eval_final_performance=True) # Create pipe for first data source pipe_source_1 = Hyperpipe('source_1', optimizer='grid_search', inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), eval_final_performance=False) pipe_source_1.add( PipelineElement.create('SourceSplitter', {'column_indices': [np.arange(0, 10)]})) pipe_source_1.add( PipelineElement.create('pca', {'n_components': pca_n_components})) pipe_source_1.add( PipelineElement.create('svc', { 'C': svc_c, 'kernel': svc_kernel })) # Create pipe for second data source pipe_source_2 = Hyperpipe('source_2', optimizer='grid_search', inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), eval_final_performance=False) pipe_source_2.add( PipelineElement.create('SourceSplitter', {'column_indices': [np.arange(10, 20)]})) pipe_source_2.add( PipelineElement.create('pca', {'n_components': pca_n_components})) pipe_source_2.add( PipelineElement.create('svc', { 'C': svc_c, 'kernel': svc_kernel })) # Create pipe for third data source pipe_source_3 = Hyperpipe('source_3', optimizer='grid_search', inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), eval_final_performance=False) pipe_source_3.add( PipelineElement.create('SourceSplitter', {'column_indices': [np.arange(20, 30)]})) pipe_source_3.add( PipelineElement.create('pca', {'n_components': pca_n_components})) pipe_source_3.add( PipelineElement.create('svc', { 'C': svc_c, 'kernel': svc_kernel })) # pipeline_fusion = PipelineStacking('multiple_source_pipes',[pipe_source_1, pipe_source_2, pipe_source_3], voting=False) pipeline_fusion = PipelineStacking( 'multiple_source_pipes', [pipe_source_1, pipe_source_2, pipe_source_3]) outer_pipe.add(pipeline_fusion) #outer_pipe.add(PipelineElement.create('svc', {'C': svc_c_2, 'kernel': svc_kernel})) #outer_pipe.add(PipelineElement.create('knn',{'n_neighbors':[15]})) outer_pipe.add( PipelineElement.create('kdnn', { 'target_dimension': [2], 'nb_epoch': [10] })) # START HYPERPARAMETER SEARCH outer_pipe.fit(self.__X, self.__y) print(outer_pipe._test_performances) pipe_results = {'train': [], 'test': []} for i in range(int(len(outer_pipe._performance_history_list) / 2)): pipe_results['train'].extend( outer_pipe._performance_history_list[i]['accuracy_folds'] ['train']) pipe_results['test'].extend(outer_pipe._performance_history_list[i] ['accuracy_folds']['test']) print(outer_pipe._test_performances['accuracy'])
def test_create_failure(self): with self.assertRaises(NameError): PipelineElement.create('dusihdaushdisuhdusiahd', {})
def testCaseA(self): pca_n_components = 10 svc_c = 1 svc_kernel = "rbf" # SET UP HYPERPIPE my_pipe = Hyperpipe('primary_pipe', optimizer='grid_search', optimizer_params={}, metrics=['accuracy', 'precision', 'f1_score'], inner_cv=KFold(n_splits=3), outer_cv=KFold(n_splits=3), eval_final_performance=True) my_pipe += PipelineElement.create('standard_scaler') my_pipe += PipelineElement.create('pca', {'n_components': [pca_n_components]}) my_pipe += PipelineElement.create('svc', {'C': [svc_c], 'kernel': [svc_kernel]}) # START HYPERPARAMETER SEARCH my_pipe.fit(self.__X, self.__y) print(my_pipe._test_performances) from Framework import LogExtractor log_ex = LogExtractor.LogExtractor(my_pipe.result_tree) log_ex.extract_csv("test_case_A.csv") # Das muss noch weg! ToDo from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.svm import SVC from sklearn.pipeline import Pipeline from sklearn.metrics import f1_score, accuracy_score, precision_score # Now we are using the native Scikit-learn methods sk_pipeline = Pipeline([("standard_scaler", StandardScaler()), ("pca", PCA(n_components=pca_n_components)), ("svc", SVC(C=svc_c, kernel=svc_kernel))]) my_pipe._generate_outer_cv_indices() tmp_counter = 0 for train_idx_arr, test_idx_arr in my_pipe.data_test_cases: sk_results = {'accuracy': [], 'precision': [], 'f1_score': [], 'default': []} outer_train_X = self.__X[train_idx_arr] outer_train_y = self.__y[train_idx_arr] outer_test_X = self.__X[test_idx_arr] outer_test_y = self.__y[test_idx_arr] sk_config_cv = KFold(n_splits=3) # Todo: test other configs and select best! for sub_train_idx, sub_test_idx in sk_config_cv.split(outer_train_X, outer_train_y): inner_train_X = self.__X[sub_train_idx] inner_train_y = self.__y[sub_train_idx] #test_X = self.__X[sub_test_idx] #test_y = self.__y[sub_test_idx] # sk_pipeline.fit(inner_train_X, inner_train_y) fit_and_predict_score = _fit_and_score(sk_pipeline, outer_train_X, outer_train_y, self.score, sub_train_idx, sub_test_idx, verbose=0, parameters={}, fit_params={}, return_train_score=True, return_n_test_samples=True, return_times=True, return_parameters=True, error_score='raise') sk_pipeline.fit(outer_train_X, outer_train_y) sk_prediction = sk_pipeline.predict(outer_test_X) sk_results['default'].append(fit_and_predict_score[1]) sk_results['accuracy'].append(accuracy_score(outer_test_y, sk_prediction)) sk_results['precision'].append(precision_score(outer_test_y, sk_prediction)) sk_results['f1_score'].append(f1_score(outer_test_y, sk_prediction)) # bestItem = np.argmax(sk_results['default']) # print([str(k)+':'+str(i[bestItem]) for k, i in sk_results.items()]) self.assertEqual(sk_results['accuracy'], my_pipe._test_performances['accuracy'][tmp_counter]) self.assertEqual(sk_results['precision'], my_pipe._test_performances['precision'][tmp_counter]) self.assertEqual(sk_results['f1_score'], my_pipe._test_performances['f1_score'][tmp_counter]) tmp_counter += 1
""" Test Feature Selection """ from sklearn.datasets import load_breast_cancer from sklearn.model_selection import KFold from Framework.PhotonBase import Hyperpipe, PipelineElement dataset = load_breast_cancer() X = dataset.data y = dataset.target # create cross-validation object first cv_object = KFold(n_splits=3, shuffle=True, random_state=0) # create a hyperPipe manager = Hyperpipe('god', cv_object, optimizer='random_grid_search') manager += PipelineElement.create('f_classif_select_percentile', {'percentile': [10, 20, 30, 100]}, test_disabled=True) # SVMs (linear and rbf) manager += PipelineElement.create('svc', {}, kernel='linear') manager.fit(X, y)
# dataset = load_breast_cancer() # dataset_files = dataset.data # targets = dataset.target print(BrainAtlas._getAtlasDict()) # setup photonai HP my_pipe = Hyperpipe('primary_pipe', optimizer='grid_search', optimizer_params={}, metrics=['mean_squared_error', 'mean_absolute_error'], inner_cv=KFold(n_splits=2, shuffle=True, random_state=3), outer_cv=KFold(n_splits=2, shuffle=True, random_state=3), eval_final_performance=True) my_pipe += PipelineElement.create('SmoothImgs', {'fwhr': [[8, 8, 8], [12, 12, 12]]}) my_pipe += PipelineElement.create('ResampleImgs', {'voxel_size': [[5, 5, 5]]}) atlas_info = AtlasInfo(atlas_name='mni_icbm152_t1_tal_nlin_sym_09a_mask', mask_threshold=.5, roi_names='all', extraction_mode='vec') #atlas_info = AtlasInfo(atlas_name='AAL', roi_names='all', extraction_mode='box') my_pipe += PipelineElement.create('BrainAtlas', {}, atlas_info_object=atlas_info) # my_pipe += PipelineElement('atlas_stacker', # AtlasStacker(atlas_info, [['SVR', {'kernel': ['rbf', 'linear']}, {}]]), # {}) my_pipe += PipelineElement.create('SVR', {'kernel': ['linear']})
print(np.sum(y)/len(y)) from pymodm import connect connect("mongodb://localhost:27017/photon_db") # BUILD PIPELINE manager = Hyperpipe('test_manager', optimizer='timeboxed_random_grid_search', optimizer_params={'limit_in_minutes': 1}, outer_cv=ShuffleSplit(test_size=0.2, n_splits=3), inner_cv=KFold(n_splits=10, shuffle=True), best_config_metric='accuracy', metrics=['accuracy', 'precision', 'recall', "f1_score"], logging=False, eval_final_performance=True, calculate_metrics_across_folds=True, verbose=2) manager.add(PipelineElement.create('standard_scaler', test_disabled=True)) manager += PipelineElement.create('pca', hyperparameters={'n_components': [None, 1, 10000]}) # tmp_lasso = Lasso() # manager.add(PipelineElement.create('SelectModelWrapper', estimator_obj=tmp_lasso)) svm = PipelineElement.create('svc', hyperparameters={'C': [0.5, 1], 'kernel': ['linear']}) manager.add(svm) manager.fit(X, y) # -----------> Result Tree generated ------------------- # result_tree = manager.result_tree # result_tree.write_to_db() # THE END debugging = True