예제 #1
0
    def setUp(self):
        self.outer_hyperpipe = Hyperpipe('outer_pipe', KFold(n_splits=2))

        # set up inner pipeline
        self.inner_hyperpipe = Hyperpipe(
            'inner_pipe',
            KFold(n_splits=2),
            optimizer=self.outer_hyperpipe.optimizer,
            local_search=False)
        self.inner_pipeline_test_element = PipelineElement.create(
            'test_wrapper')
        self.inner_hyperpipe += self.inner_pipeline_test_element
        self.pipeline_fusion = PipelineStacking('fusion_element',
                                                [self.inner_hyperpipe])

        # set up outer pipeline
        self.outer_pipeline_test_element = PipelineElement.create(
            'test_wrapper')
        self.outer_hyperpipe += self.outer_pipeline_test_element
        self.outer_hyperpipe += self.pipeline_fusion

        self.X = np.arange(1, 101)
        self.y = np.ones((100, ))

        self.inner_hyperpipe.debug_cv_mode = True
        self.outer_hyperpipe.debug_cv_mode = True
예제 #2
0
 def setUp(self):
     self.pca_pipe_element = PipelineElement.create(
         'pca', {'n_components': [1, 2]}, test_disabled=True)
     self.svc_pipe_element = PipelineElement.create('svc', {
         'C': [0.1, 1],
         'kernel': ['rbf', 'sigmoid']
     })
     self.cv_object = KFold(n_splits=3)
     self.hyperpipe = Hyperpipe('god', self.cv_object)
     self.hyperpipe += self.pca_pipe_element
     self.hyperpipe.add(self.svc_pipe_element)
예제 #3
0
    def testCaseA(self):
        pca_n_components = [2, 5]
        svc_c = [.1, 1, 5]
        #svc_kernel = ['rbf']
        svc_kernel = ['rbf', 'linear']

        # SET UP HYPERPIPE
        my_pipe = Hyperpipe('primary_pipe',
                            optimizer='grid_search',
                            optimizer_params={},
                            metrics=['accuracy', 'precision', 'f1_score'],
                            inner_cv=KFold(n_splits=2, random_state=3),
                            eval_final_performance=False)

        my_pipe += PipelineElement.create('standard_scaler')
        my_pipe += PipelineElement.create('pca',
                                          {'n_components': pca_n_components})
        my_pipe += PipelineElement.create('svc', {
            'C': svc_c,
            'kernel': svc_kernel
        })

        # START HYPERPARAMETER SEARCH
        my_pipe.fit(self.__X, self.__y)
        print(my_pipe._test_performances)
        pipe_results = {'train': [], 'test': []}
        for i in range(len(my_pipe._performance_history_list)):
            pipe_results['train'].extend(my_pipe._performance_history_list[i]
                                         ['accuracy_folds']['train'])
            pipe_results['test'].extend(
                my_pipe._performance_history_list[i]['accuracy_folds']['test'])

        print('\n\n')
        print('Running sklearn version...')
        #cv_outer = KFold(n_splits=2, random_state=3)
        cv_inner_1 = KFold(n_splits=2, random_state=3)

        sk_results = {'train': [], 'test': []}

        for n_comp in pca_n_components:
            for c in svc_c:
                for current_kernel in svc_kernel:
                    tr_acc = []
                    val_acc = []
                    for train_2, val_1 in cv_inner_1.split(self.__X):

                        data_train_2 = self.__X[train_2]
                        print(data_train_2.shape)
                        data_val_1 = self.__X[val_1]
                        y_train_2 = self.__y[train_2]
                        y_val_1 = self.__y[val_1]

                        my_scaler = StandardScaler()
                        my_scaler.fit(data_train_2)
                        data_train_2 = my_scaler.transform(data_train_2)
                        data_val_1 = my_scaler.transform(data_val_1)

                        # Run PCA
                        my_pca = PCA(n_components=n_comp)
                        my_pca.fit(data_train_2)
                        data_tr_2_pca = my_pca.transform(data_train_2)
                        data_val_1_pca = my_pca.transform(data_val_1)

                        # Run SVC
                        my_svc = SVC(kernel=current_kernel, C=c)
                        my_svc.fit(data_tr_2_pca, y_train_2)

                        tr_acc.append(my_svc.score(data_tr_2_pca, y_train_2))
                        val_acc.append(my_svc.score(data_val_1_pca, y_val_1))
                        print('n_components: ', n_comp, 'kernel:',
                              current_kernel, 'c:', c)
                        print('Training 2:', tr_acc[-1], 'validation 1:',
                              val_acc[-1])

                    sk_results['train'].extend(tr_acc)
                    sk_results['test'].extend(val_acc)

        print('\nCompare results of last iteration (outer cv)...')
        print('SkL  Train:', sk_results['train'])
        print('Pipe Train:', pipe_results['train'])
        print('SkL  test: ', sk_results['test'])
        print('Pipe test: ', pipe_results['test'])

        self.assertEqual(sk_results['test'], pipe_results['test'])
        self.assertEqual(sk_results['train'], pipe_results['train'])
예제 #4
0
    def testCaseA(self):
        pca_n_components = [2, 5]
        svc_c = [.1, 1]
        svc_kernel = ['rbf']
        # svc_kernel = ['rbf','linear']

        # SET UP HYPERPIPE
        my_pipe = Hyperpipe('primary_pipe', optimizer='grid_search',
                            optimizer_params={},
                            inner_cv=KFold(
                                n_splits=2, random_state=3),
                            outer_cv=KFold(
                                n_splits=2, random_state=3), verbose=2, eval_final_performance=True)

        my_pipe += PipelineElement.create('standard_scaler')
        my_pipe += PipelineElement.create('pca', {'n_components': pca_n_components})
        my_pipe += PipelineElement.create('svc', {'C': svc_c, 'kernel': svc_kernel})

        # START HYPERPARAMETER SEARCH
        my_pipe.fit(self.__X, self.__y)
        from Framework import LogExtractor
        log_ex = LogExtractor.LogExtractor(my_pipe.result_tree)
        log_ex.extract_csv("test_case_A2.csv")

        # print(my_pipe.test_performances)
        # pipe_results = {'train': [], 'test': []}
        # for i in range(len(my_pipe.performance_history_list)):
        #     pipe_results['train'].extend(
        #         my_pipe.performance_history_list[i]['accuracy_folds']['train'])
        #     pipe_results['test'].extend(
        #         my_pipe.performance_history_list[i]['accuracy_folds']['test'])

        print('\n\n')
        print('Running sklearn version...')
        cv_outer = KFold(n_splits=2, random_state=3)
        cv_inner_1 = KFold(n_splits=2, random_state=3)

        for train_1, test in cv_outer.split(self.__X):
            data_train_1 = self.__X[train_1]
            data_test = self.__X[test]
            y_train_1 = self.__y[train_1]
            y_test = self.__y[test]
            sk_results = {'train': [], 'test': []}

            for n_comp in pca_n_components:
                for current_kernel in svc_kernel:
                    for c in svc_c:
                        tr_acc = []
                        val_acc = []

                        for train_2, val_1 in cv_inner_1.split(
                                data_train_1):
                            data_train_2 = data_train_1[train_2]
                            data_val_1 = data_train_1[val_1]
                            y_train_2 = y_train_1[train_2]
                            y_val_1 = y_train_1[val_1]

                            my_scaler = StandardScaler()
                            my_scaler.fit(data_train_2)
                            data_train_2 = my_scaler.transform(data_train_2)
                            data_val_1 = my_scaler.transform(data_val_1)

                            # Run PCA
                            my_pca = PCA(n_components=n_comp)
                            my_pca.fit(data_train_2)
                            data_tr_2_pca = my_pca.transform(data_train_2)
                            data_val_1_pca = my_pca.transform(data_val_1)

                            # Run SVC
                            my_svc = SVC(kernel=current_kernel, C=c)
                            my_svc.fit(data_tr_2_pca, y_train_2)

                            tr_acc.append(my_svc.score(data_tr_2_pca, y_train_2))
                            val_acc.append(my_svc.score(data_val_1_pca, y_val_1))
                            print('n_components: ', n_comp, 'kernel:',
                                  current_kernel, 'c:', c)
                            print('Training 2:', tr_acc[-1],
                                  'validation 1:', val_acc[-1])

                        sk_results['train'].extend(tr_acc)
                        sk_results['test'].extend(val_acc)

        print('\nCompare results of last iteration (outer cv)...')
        print('SkL  Train:', sk_results['train'])
        print('Pipe Train:', pipe_results['train'])
        print('SkL  test: ', sk_results['test'])
        print('Pipe test: ', pipe_results['test'])

        self.assertEqual(sk_results['test'], pipe_results['test'])
        self.assertEqual(sk_results['train'], pipe_results['train'])
예제 #5
0
    def testCaseB(self):
        pca_n_components = [7, 15, 10]
        svc_c = [.1, 1]
        #svc_kernel = ['rbf']
        svc_kernel = ['rbf', 'linear']
        cv_outer = ShuffleSplit(n_splits=1, test_size=0.2, random_state=3)
        cv_inner_1 = ShuffleSplit(n_splits=1, test_size=0.2, random_state=3)
        cv_inner_2 = ShuffleSplit(n_splits=1, test_size=0.2, random_state=3)

        # SET UP HYPERPIPE
        outer_pipe = Hyperpipe('outer_pipe',
                               optimizer='grid_search',
                               metrics=['accuracy'],
                               inner_cv=cv_inner_1,
                               outer_cv=cv_outer,
                               eval_final_performance=True)
        inner_pipe = Hyperpipe('pca_pipe',
                               optimizer='grid_search',
                               inner_cv=cv_inner_2,
                               eval_final_performance=False)

        inner_pipe.add(PipelineElement.create('standard_scaler'))
        inner_pipe.add(
            PipelineElement.create('ae_pca',
                                   {'n_components': pca_n_components}))

        pipeline_fusion = PipelineStacking('fusion_element', [inner_pipe])

        outer_pipe.add(pipeline_fusion)
        outer_pipe.add(
            PipelineElement.create('svc', {
                'C': svc_c,
                'kernel': svc_kernel
            }))

        # START HYPERPARAMETER SEARCH
        outer_pipe.fit(self.__X, self.__y)
        print(outer_pipe._test_performances)
        pipe_results = {'train': [], 'test': []}
        for i in range(len(outer_pipe._performance_history_list)):
            pipe_results['train'].extend(
                outer_pipe._performance_history_list[i]['accuracy_folds']
                ['train'])
            pipe_results['test'].extend(outer_pipe._performance_history_list[i]
                                        ['accuracy_folds']['test'])

        print(outer_pipe._test_performances['accuracy'])

        print('\n\n')
        print('Running sklearn version...\n')
        opt_tr_acc = []
        opt_test_acc = []

        for train_1, test in cv_outer.split(self.__X):
            data_train_1 = self.__X[train_1]
            data_test = self.__X[test]
            y_train_1 = self.__y[train_1]
            y_test = self.__y[test]
            config_inner_1 = {'C': [], 'kernel': []}
            sk_results_inner1 = {
                'train_2': [],
                'val_1': [],
                'train_2_mean': [],
                'val_1_mean': []
            }
            print('Outer Split')
            print('n train_1:', data_train_1.shape[0], '\n')

            for c in svc_c:
                for current_kernel in svc_kernel:
                    config_inner_1['C'].extend([c])
                    config_inner_1['kernel'].extend([current_kernel])

                    print('C:', c, 'Kernel:', current_kernel, '\n')
                    svc_score_tr = []
                    svc_score_te = []
                    fold_cnt = 1
                    for train_2, val_1 in cv_inner_1.split(data_train_1):
                        print('\n\nSklearn Outer Pipe FoldMetrics', fold_cnt)

                        data_train_2 = data_train_1[train_2]
                        data_val_1 = data_train_1[val_1]
                        y_train_2 = y_train_1[train_2]
                        y_val_1 = y_train_1[val_1]
                        print('n train_2:', data_train_2.shape[0], '\n')

                        config_inner_2 = {'n_comp': []}
                        print('Sklearn PCA Pipe')
                        sk_results_inner2 = {
                            'train_3': [],
                            'val_2': [],
                            'train_3_mean': [],
                            'val_2_mean': []
                        }
                        for n_comp in pca_n_components:
                            config_inner_2['n_comp'].extend([n_comp])

                            tr_acc = []
                            val_acc = []

                            # print('Some training data:',
                            #       data_train_2[0:2, 0:2])
                            for train_3, val_2 in cv_inner_2.split(
                                    data_train_2):

                                data_train_3 = data_train_2[train_3]
                                data_val_2 = data_train_2[val_2]

                                my_scaler = StandardScaler()
                                my_scaler.fit(data_train_3)
                                data_train_3 = my_scaler.transform(
                                    data_train_3)
                                data_val_2 = my_scaler.transform(data_val_2)

                                # Run PCA
                                my_pca = PCA_AE_Wrapper(n_components=n_comp)
                                my_pca.fit(data_train_3)

                                mae_tr = my_pca.score(data_train_3)
                                mae_te = my_pca.score(data_val_2)

                                tr_acc.append(mae_tr)
                                val_acc.append(mae_te)

                            sk_results_inner2['train_3'].extend(tr_acc)
                            sk_results_inner2['val_2'].extend(val_acc)
                            sk_results_inner2['train_3_mean'].extend(
                                [np.mean(tr_acc)])
                            sk_results_inner2['val_2_mean'].extend(
                                [np.mean(val_acc)])

                            print('n_comp:', n_comp)
                            print('n train_3 fold 1:', data_train_3.shape[0])
                            print('Training 3 mean:', [np.mean(tr_acc)],
                                  'validation 2 mean:', [np.mean(val_acc)])
                        # find best config for val 2
                        best_config_id = np.argmin(
                            sk_results_inner2['val_2_mean'])
                        print('Best PCA config:',
                              config_inner_2['n_comp'][best_config_id], '\n')
                        # fit optimum pipe

                        my_scaler = StandardScaler()
                        my_scaler.fit(data_train_2)
                        data_train_2 = my_scaler.transform(data_train_2)
                        data_val_1 = my_scaler.transform(data_val_1)

                        # Run PCA
                        my_pca = PCA_AE_Wrapper(
                            n_components=config_inner_2['n_comp']
                            [best_config_id])
                        my_pca.fit(data_train_2)
                        data_tr_2_pca = my_pca.transform(data_train_2)
                        data_val_1_pca = my_pca.transform(data_val_1)

                        # Run SVC
                        my_svc = SVC(kernel=current_kernel, C=c)
                        my_svc.fit(data_tr_2_pca, y_train_2)
                        svc_score_tr.append(
                            my_svc.score(data_tr_2_pca, y_train_2))
                        svc_score_te.append(
                            my_svc.score(data_val_1_pca, y_val_1))
                        print('Fit Optimum PCA Config and train with SVC')
                        print('n train 2:', data_train_2.shape[0])
                        print('n_comp:',
                              config_inner_2['n_comp'][best_config_id])
                        print('SVC Train:', svc_score_tr[-1])
                        print('SVC test:', svc_score_te[-1], '\n\n')
                        sk_results_inner1['train_2'].append(svc_score_tr[-1])
                        sk_results_inner1['val_1'].append(svc_score_te[-1])
                        fold_cnt += 1
                    sk_results_inner1['train_2_mean'].append(
                        np.mean(svc_score_tr))
                    sk_results_inner1['val_1_mean'].append(
                        np.mean(svc_score_te))

            print('\nNow find best config for SVC...')
            best_config_id_inner_1 = np.argmax(sk_results_inner1['val_1_mean'])
            print('Some test data:')
            print(data_test.shape)
            print(data_test[0:2, 0:2])

            # fit optimum pipe
            my_scaler = StandardScaler()
            my_scaler.fit(data_train_1)
            data_train_1 = my_scaler.transform(data_train_1)
            data_test = my_scaler.transform(data_test)

            # Run PCA
            my_pca = PCA_AE_Wrapper(
                n_components=config_inner_2['n_comp'][best_config_id])
            my_pca.fit(data_train_1)
            data_tr_1_pca = my_pca.transform(data_train_1)
            data_test_pca = my_pca.transform(data_test)

            # Run SVC
            my_svc = SVC(
                kernel=config_inner_1['kernel'][best_config_id_inner_1],
                C=config_inner_1['C'][best_config_id_inner_1])
            print('Best overall config:...')
            print('C = ', config_inner_1['C'][best_config_id_inner_1])
            print('kernel=', config_inner_1['kernel'][best_config_id_inner_1])
            print('pca_n_comp=', config_inner_2['n_comp'][best_config_id])
            print('n train 1:', data_train_1.shape[0])
            my_svc.fit(data_tr_1_pca, y_train_1)

            opt_tr_acc.append(my_svc.score(data_tr_1_pca, y_train_1))
            opt_test_acc.append(my_svc.score(data_test_pca, y_test))
            print('Train Acc:', opt_tr_acc[-1])
            print('test Acc:', opt_test_acc[-1])

        print('\nCompare results of last iteration (outer cv)...')
        print('SkL  Train:', sk_results_inner1['train_2'])
        print('Pipe Train:', pipe_results['train'])
        print('SkL  test: ', sk_results_inner1['val_1'])
        print('Pipe test: ', pipe_results['test'])
        print('\nEval final performance:')
        print('Pipe final perf:', outer_pipe._test_performances['accuracy'])
        print('Sklearn final perf:', opt_test_acc)
        self.assertEqual(sk_results_inner1['train_2'], pipe_results['train'])
        self.assertEqual(sk_results_inner1['val_1'], pipe_results['test'])
        self.assertEqual(opt_test_acc,
                         outer_pipe._test_performances['accuracy'])
예제 #6
0
    def testCaseC2(self):
        pca_n_components = [5, 10]
        svc_c = [0.1]
        svc_c_2 = [1]
        #svc_kernel = ['rbf']
        svc_kernel = ['linear']

        # SET UP HYPERPIPE

        outer_pipe = Hyperpipe('outer_pipe',
                               optimizer='grid_search',
                               metrics=['accuracy'],
                               inner_cv=ShuffleSplit(n_splits=1,
                                                     test_size=0.2,
                                                     random_state=3),
                               outer_cv=ShuffleSplit(n_splits=1,
                                                     test_size=0.2,
                                                     random_state=3),
                               eval_final_performance=True)

        # Create pipe for first data source
        pipe_source_1 = Hyperpipe('source_1',
                                  optimizer='grid_search',
                                  inner_cv=ShuffleSplit(n_splits=1,
                                                        test_size=0.2,
                                                        random_state=3),
                                  eval_final_performance=False)

        pipe_source_1.add(
            PipelineElement.create('SourceSplitter',
                                   {'column_indices': [np.arange(0, 10)]}))
        pipe_source_1.add(
            PipelineElement.create('pca', {'n_components': pca_n_components}))
        pipe_source_1.add(
            PipelineElement.create('svc', {
                'C': svc_c,
                'kernel': svc_kernel
            }))

        # Create pipe for second data source
        pipe_source_2 = Hyperpipe('source_2',
                                  optimizer='grid_search',
                                  inner_cv=ShuffleSplit(n_splits=1,
                                                        test_size=0.2,
                                                        random_state=3),
                                  eval_final_performance=False)

        pipe_source_2.add(
            PipelineElement.create('SourceSplitter',
                                   {'column_indices': [np.arange(10, 20)]}))

        pipe_source_2.add(
            PipelineElement.create('pca', {'n_components': pca_n_components}))
        pipe_source_2.add(
            PipelineElement.create('svc', {
                'C': svc_c,
                'kernel': svc_kernel
            }))
        # Create pipe for third data source
        pipe_source_3 = Hyperpipe('source_3',
                                  optimizer='grid_search',
                                  inner_cv=ShuffleSplit(n_splits=1,
                                                        test_size=0.2,
                                                        random_state=3),
                                  eval_final_performance=False)

        pipe_source_3.add(
            PipelineElement.create('SourceSplitter',
                                   {'column_indices': [np.arange(20, 30)]}))
        pipe_source_3.add(
            PipelineElement.create('pca', {'n_components': pca_n_components}))
        pipe_source_3.add(
            PipelineElement.create('svc', {
                'C': svc_c,
                'kernel': svc_kernel
            }))

        # pipeline_fusion = PipelineStacking('multiple_source_pipes',[pipe_source_1, pipe_source_2, pipe_source_3], voting=False)
        pipeline_fusion = PipelineStacking(
            'multiple_source_pipes',
            [pipe_source_1, pipe_source_2, pipe_source_3])

        outer_pipe.add(pipeline_fusion)
        #outer_pipe.add(PipelineElement.create('svc', {'C': svc_c_2, 'kernel': svc_kernel}))
        #outer_pipe.add(PipelineElement.create('knn',{'n_neighbors':[15]}))
        outer_pipe.add(
            PipelineElement.create('kdnn', {
                'target_dimension': [2],
                'nb_epoch': [10]
            }))

        # START HYPERPARAMETER SEARCH
        outer_pipe.fit(self.__X, self.__y)
        print(outer_pipe._test_performances)
        pipe_results = {'train': [], 'test': []}
        for i in range(int(len(outer_pipe._performance_history_list) / 2)):
            pipe_results['train'].extend(
                outer_pipe._performance_history_list[i]['accuracy_folds']
                ['train'])
            pipe_results['test'].extend(outer_pipe._performance_history_list[i]
                                        ['accuracy_folds']['test'])

        print(outer_pipe._test_performances['accuracy'])
예제 #7
0
    def testCaseA(self):
        pca_n_components = 10
        svc_c = 1
        svc_kernel = "rbf"
        # SET UP HYPERPIPE
        my_pipe = Hyperpipe('primary_pipe', optimizer='grid_search', optimizer_params={},
                            metrics=['accuracy', 'precision', 'f1_score'],
                            inner_cv=KFold(n_splits=3),
                            outer_cv=KFold(n_splits=3),
                            eval_final_performance=True)

        my_pipe += PipelineElement.create('standard_scaler')
        my_pipe += PipelineElement.create('pca', {'n_components': [pca_n_components]})
        my_pipe += PipelineElement.create('svc', {'C': [svc_c], 'kernel': [svc_kernel]})

        # START HYPERPARAMETER SEARCH
        my_pipe.fit(self.__X, self.__y)
        print(my_pipe._test_performances)
        from Framework import LogExtractor
        log_ex = LogExtractor.LogExtractor(my_pipe.result_tree)
        log_ex.extract_csv("test_case_A.csv")

        # Das muss noch weg! ToDo
        from sklearn.preprocessing import StandardScaler
        from sklearn.decomposition import PCA
        from sklearn.svm import SVC
        from sklearn.pipeline import Pipeline
        from sklearn.metrics import f1_score, accuracy_score, precision_score

        # Now we are using the native Scikit-learn methods
        sk_pipeline = Pipeline([("standard_scaler", StandardScaler()), ("pca", PCA(n_components=pca_n_components)),
                               ("svc", SVC(C=svc_c, kernel=svc_kernel))])

        my_pipe._generate_outer_cv_indices()
        tmp_counter = 0
        for train_idx_arr, test_idx_arr in my_pipe.data_test_cases:

            sk_results = {'accuracy': [], 'precision': [], 'f1_score': [], 'default': []}

            outer_train_X = self.__X[train_idx_arr]
            outer_train_y = self.__y[train_idx_arr]
            outer_test_X = self.__X[test_idx_arr]
            outer_test_y = self.__y[test_idx_arr]

            sk_config_cv = KFold(n_splits=3)
            # Todo: test other configs and select best!
            for sub_train_idx, sub_test_idx in sk_config_cv.split(outer_train_X, outer_train_y):
                inner_train_X = self.__X[sub_train_idx]
                inner_train_y = self.__y[sub_train_idx]
                #test_X = self.__X[sub_test_idx]
                #test_y = self.__y[sub_test_idx]

                # sk_pipeline.fit(inner_train_X, inner_train_y)

                fit_and_predict_score = _fit_and_score(sk_pipeline, outer_train_X, outer_train_y, self.score,
                                                       sub_train_idx, sub_test_idx, verbose=0, parameters={},
                                                       fit_params={},
                                                       return_train_score=True,
                                                       return_n_test_samples=True,
                                                       return_times=True, return_parameters=True,
                                                       error_score='raise')

            sk_pipeline.fit(outer_train_X, outer_train_y)
            sk_prediction = sk_pipeline.predict(outer_test_X)

            sk_results['default'].append(fit_and_predict_score[1])
            sk_results['accuracy'].append(accuracy_score(outer_test_y, sk_prediction))
            sk_results['precision'].append(precision_score(outer_test_y, sk_prediction))
            sk_results['f1_score'].append(f1_score(outer_test_y, sk_prediction))

            # bestItem = np.argmax(sk_results['default'])
            # print([str(k)+':'+str(i[bestItem]) for k, i in sk_results.items()])

            self.assertEqual(sk_results['accuracy'], my_pipe._test_performances['accuracy'][tmp_counter])
            self.assertEqual(sk_results['precision'], my_pipe._test_performances['precision'][tmp_counter])
            self.assertEqual(sk_results['f1_score'], my_pipe._test_performances['f1_score'][tmp_counter])

            tmp_counter += 1
예제 #8
0
"""
Test Feature Selection
"""

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold

from Framework.PhotonBase import Hyperpipe, PipelineElement

dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

# create cross-validation object first
cv_object = KFold(n_splits=3, shuffle=True, random_state=0)

# create a hyperPipe
manager = Hyperpipe('god', cv_object, optimizer='random_grid_search')

manager += PipelineElement.create('f_classif_select_percentile',
                                  {'percentile': [10, 20, 30, 100]},
                                  test_disabled=True)

# SVMs (linear and rbf)
manager += PipelineElement.create('svc', {}, kernel='linear')

manager.fit(X, y)
예제 #9
0
dataset_files = oasis_dataset.gray_matter_maps
targets = oasis_dataset.ext_vars['age'].astype(float)  # age

# # data
# from sklearn.datasets import load_breast_cancer
# dataset = load_breast_cancer()
# dataset_files = dataset.data
# targets = dataset.target

print(BrainAtlas._getAtlasDict())

# setup photonai HP
my_pipe = Hyperpipe('primary_pipe',
                    optimizer='grid_search',
                    optimizer_params={},
                    metrics=['mean_squared_error', 'mean_absolute_error'],
                    inner_cv=KFold(n_splits=2, shuffle=True, random_state=3),
                    outer_cv=KFold(n_splits=2, shuffle=True, random_state=3),
                    eval_final_performance=True)

my_pipe += PipelineElement.create('SmoothImgs',
                                  {'fwhr': [[8, 8, 8], [12, 12, 12]]})
my_pipe += PipelineElement.create('ResampleImgs', {'voxel_size': [[5, 5, 5]]})

atlas_info = AtlasInfo(atlas_name='mni_icbm152_t1_tal_nlin_sym_09a_mask',
                       mask_threshold=.5,
                       roi_names='all',
                       extraction_mode='vec')
#atlas_info = AtlasInfo(atlas_name='AAL', roi_names='all', extraction_mode='box')
my_pipe += PipelineElement.create('BrainAtlas', {},
                                  atlas_info_object=atlas_info)
예제 #10
0
#  -----------> calculate something ------------------- #

# LOAD DATA
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target
print(np.sum(y)/len(y))

from pymodm import connect
connect("mongodb://localhost:27017/photon_db")

# BUILD PIPELINE
manager = Hyperpipe('test_manager',
                    optimizer='timeboxed_random_grid_search', optimizer_params={'limit_in_minutes': 1},
                    outer_cv=ShuffleSplit(test_size=0.2, n_splits=3),
                    inner_cv=KFold(n_splits=10, shuffle=True), best_config_metric='accuracy',
                    metrics=['accuracy', 'precision', 'recall', "f1_score"],
                    logging=False, eval_final_performance=True,
                    calculate_metrics_across_folds=True,
                    verbose=2)

manager.add(PipelineElement.create('standard_scaler', test_disabled=True))
manager += PipelineElement.create('pca', hyperparameters={'n_components': [None, 1, 10000]})
# tmp_lasso = Lasso()
# manager.add(PipelineElement.create('SelectModelWrapper', estimator_obj=tmp_lasso))

svm = PipelineElement.create('svc', hyperparameters={'C': [0.5, 1], 'kernel': ['linear']})
manager.add(svm)
manager.fit(X, y)

#  -----------> Result Tree generated ------------------- #
result_tree = manager.result_tree