예제 #1
0
    def test_adjusted_delegate_call_transformer(self):
        # check standard transformer
        trans = PipelineElement.create('Transformer',
                                       base_element=DummyTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # only X should be transformed
        self.assertTrue(np.array_equal(y, self.y))
        self.assertDictEqual(kwargs, self.kwargs)

        # check transformer needs y
        trans = PipelineElement.create('NeedsYTransformer',
                                       base_element=DummyNeedsYTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertDictEqual(kwargs, self.kwargs)

        trans = PipelineElement.create('NeedsYTransformer',
                                       base_element=DummyNeedsYTransformer(),
                                       hyperparameters={})
        X, y, kwargs = trans.transform(self.X,
                                       self.y)  # this time without any kwargs
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertDictEqual(kwargs, {})

        # check transformer needs covariates
        trans = PipelineElement.create(
            'NeedsCovariatesTransformer',
            base_element=DummyNeedsCovariatesTransformer(),
            hyperparameters={})
        X, y, kwargs = trans.transform(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargst['covariates']))
        self.assertEqual(y, None)

        # check transformer needs covariates and needs y
        trans = PipelineElement.create(
            'NeedsCovariatesAndYTransformer',
            base_element=DummyNeedsCovariatesAndYTransformer(),
            hyperparameters={})
        X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs)
        self.assertTrue(np.array_equal(X, self.Xt))
        self.assertTrue(np.array_equal(y, self.yt))
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargst['covariates']))
예제 #2
0
    def test_no_y_transformers(self):
        stacking_element = Stack("forbidden_stack")
        my_dummy = PipelineElement.create(
            "dummy", DummyNeedsCovariatesAndYTransformer(), {})

        with self.assertRaises(NotImplementedError):
            stacking_element += my_dummy
예제 #3
0
    def test_neuro_hyperpipe_parallelized_batched_caching(self):

        cache_path = self.cache_folder_path

        self.hyperpipe = Hyperpipe('complex_case',
                                   inner_cv=KFold(n_splits=5),
                                   outer_cv=KFold(n_splits=3),
                                   optimizer='grid_search',
                                   cache_folder=cache_path,
                                   metrics=['mean_squared_error'],
                                   best_config_metric='mean_squared_error',
                                   output_settings=OutputSettings(
                                       project_folder=self.tmp_folder_path))

        nb = ParallelBranch("SubjectCaching", nr_of_processes=1)
        nb += PipelineElement.create("ResampleImages",
                                     StupidAdditionTransformer(),
                                     {'voxel_size': [3, 5, 10]},
                                     batch_size=4)
        self.hyperpipe += nb

        self.hyperpipe += PipelineElement("StandardScaler", {})
        self.hyperpipe += PipelineElement("PCA", {'n_components': [3, 4]})
        self.hyperpipe += PipelineElement("SVR", {'kernel': ['rbf', 'linear']})

        self.hyperpipe.fit(self.X, self.y)

        # assert cache is empty again
        nr_of_p_files = len(
            glob.glob(os.path.join(self.hyperpipe.cache_folder, "*.p")))
        print(nr_of_p_files)
        self.assertTrue(nr_of_p_files == 0)
예제 #4
0
    def generate_hyperpipes(self):
        if self.atlas_info_object.roi_names_runtime:
            self.rois = self.atlas_info_object.roi_names_runtime
            #
            # self.outer_pipe = Hyperpipe(self.atlas_name + 'outer_pipe', optimizer='grid_search',
            #                        metrics=['accuracy'], hyperparameter_specific_config_cv_object=
            #                        ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
            #                        hyperparameter_search_cv_object=
                #                        ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
                #                        eval_final_performance=True)

            inner_pipe_list = {}
            for i in range(len(self.rois)):
                tmp_inner_pipe = Hyperpipe(self.atlas_name + '_' + str(self.rois[i]), optimizer='grid_search',
                                           inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3),
                                           eval_final_performance=False, verbose=logging.verbosity_level,
                                           best_config_metric=self.best_config_metric, metrics=self.metrics)

                # at first set a filter element

                roi_filter_element = RoiFilterElement(i)
                tmp_inner_pipe.filter_element = roi_filter_element

                # secondly add all other items
                for pipe_item in self.hyperpipe_elements:
                    tmp_inner_pipe += PipelineElement.create(pipe_item[0], pipe_item[1], **pipe_item[2])

                inner_pipe_list[self.rois[i]] = tmp_inner_pipe

            self.pipeline_fusion = Stack('multiple_source_pipes', inner_pipe_list.values(), voting=False)
예제 #5
0
    def test_adjusted_delegate_call_estimator(self):
        # check standard estimator
        est = PipelineElement.create('Estimator',
                                     base_element=DummyEstimator(),
                                     hyperparameters={})
        y = est.predict(self.X)
        self.assertTrue(np.array_equal(
            y, self.Xt))  # DummyEstimator returns X as y predictions

        # check estimator needs covariates
        est = PipelineElement.create(
            'Estimator',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        X = est.predict(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions
예제 #6
0
    def test_copy_me(self):
        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        svc.set_params(**{'C': 0.1, 'kernel': 'sigmoid'})
        copy = svc.copy_me()

        self.assertEqual(svc.random_state, copy.random_state)
        self.assertNotEqual(copy.base_element, svc.base_element)
        self.assertDictEqual(elements_to_dict(copy), elements_to_dict(svc))
        self.assertEqual(copy.base_element.C, svc.base_element.C)

        # check if copies are still the same, even when making a copy of a fitted PipelineElement
        copy_after_fit = svc.fit(self.X, self.y).copy_me()
        self.assertDictEqual(elements_to_dict(copy),
                             elements_to_dict(copy_after_fit))

        svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        copy = svc.copy_me()
        self.assertDictEqual(copy.hyperparameters, {
            'SVC__C': [0.1, 1],
            'SVC__kernel': ['rbf', 'sigmoid']
        })
        copy.base_element.C = 3
        self.assertNotEqual(svc.base_element.C, copy.base_element.C)

        # test custom element
        custom_element = PipelineElement.create(
            'CustomElement',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        copy = custom_element.copy_me()
        self.assertDictEqual(elements_to_dict(custom_element),
                             elements_to_dict(copy))

        custom_element2 = PipelineElement.create(
            'MyUnDeepcopyableObject',
            base_element=GridSearchOptimizer(),
            hyperparameters={})
        with self.assertRaises(Exception):
            custom_element2.copy_me()
예제 #7
0
    def test_preprocessing(self):

        prepro_pipe = Preprocessing()
        prepro_pipe += PipelineElement.create(
            "dummy", DummyYAndCovariatesTransformer(), {}
        )

        self.hyperpipe += prepro_pipe
        self.hyperpipe.fit(self.__X, self.__y)

        self.assertTrue(np.array_equal(self.__y + 1, self.hyperpipe.data.y))
예제 #8
0
    def test_predict_when_no_transform(self):
        # check standard estimator
        est = PipelineElement.create('Estimator',
                                     base_element=DummyEstimator(),
                                     hyperparameters={})
        X, y, kwargs = est.transform(self.X)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions
        self.assertEqual(y, None)

        # check estimator needs covariates
        est = PipelineElement.create(
            'Estimator',
            base_element=DummyNeedsCovariatesEstimator(),
            hyperparameters={})
        X, y, kwargs = est.transform(self.X, **self.kwargs)
        self.assertTrue(np.array_equal(
            X, self.Xt))  # DummyEstimator returns X as y predictions
        self.assertTrue(
            np.array_equal(kwargs['covariates'], self.kwargs['covariates']))
        self.assertEqual(y, None)
예제 #9
0
    def test_estimator_type(self):
        estimator = PipelineElement('SVC')
        self.assertEqual(estimator._estimator_type, 'classifier')

        estimator = PipelineElement('SVR')
        self.assertEqual(estimator._estimator_type, 'regressor')

        estimator = PipelineElement('PCA')
        self.assertEqual(estimator._estimator_type, None)

        estimator = PipelineElement.create('Dummy', DummyEstimatorWrongType(),
                                           {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

        estimator = PipelineElement.create('Dummy',
                                           DummyTransformerWithPredict(), {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type

        estimator = PipelineElement.create('Dummy', DummyEstimatorNoPredict(),
                                           {})
        with self.assertRaises(NotImplementedError):
            est_type = estimator._estimator_type
예제 #10
0
    def setUp(self):
        super(PipelineTests, self).setUp()
        self.X, self.y = load_breast_cancer(return_X_y=True)

        # Photon Version
        self.p_pca = PipelineElement("PCA", {}, random_state=3)
        self.p_svm = PipelineElement("SVC", {}, random_state=3)
        self.p_ss = PipelineElement("StandardScaler", {})
        self.p_dt = PipelineElement("DecisionTreeClassifier", random_state=3)

        dummy_element = DummyYAndCovariatesTransformer()
        self.dummy_photon_element = PipelineElement.create("DummyTransformer", dummy_element, {})

        self.sk_pca = PCA(random_state=3)
        self.sk_svc = SVC(random_state=3)
        self.sk_ss = StandardScaler()
        self.sk_dt = DecisionTreeClassifier(random_state=3)
예제 #11
0
    def test_single_subject_caching(self):

        nb = ParallelBranch("subject_caching_test")
        # increase complexity by adding batching
        nb += PipelineElement.create("ResampleImages",
                                     StupidAdditionTransformer(), {},
                                     batch_size=4)

        cache_folder = self.cache_folder_path
        cache_folder = os.path.join(cache_folder, 'subject_caching_test')
        nb.base_element.cache_folder = cache_folder

        def transform_and_check_folder(config, expected_nr_of_files):
            nb.set_params(**config)
            nb.transform(self.X, self.y)
            nr_of_generated_cache_files = len(
                glob.glob(os.path.join(cache_folder, "*.p")))
            self.assertTrue(
                nr_of_generated_cache_files == expected_nr_of_files)

        # fit with first config
        # expect one cache file per input file
        transform_and_check_folder({'ResampleImages__voxel_size': 5},
                                   self.nr_of_expected_pickles_per_config)

        # after fitting with second config, we expect two times the number of input files to be in cache
        transform_and_check_folder({'ResampleImages__voxel_size': 10},
                                   2 * self.nr_of_expected_pickles_per_config)

        # fit with first config again, we expect to not have generate other cache files, because they exist
        transform_and_check_folder({'ResampleImages__voxel_size': 5},
                                   2 * self.nr_of_expected_pickles_per_config)

        # clean up afterwards
        CacheManager.clear_cache_files(cache_folder)
        CacheManager.clear_cache_files(self.tmp_folder_path, force_all=True)
예제 #12
0
    def test_save_optimum_pipe(self):
        # todo: test .save() of custom model
        tmp_path = os.path.join(self.tmp_folder_path, "optimum_pipypipe")
        settings = OutputSettings(project_folder=tmp_path, overwrite_results=True)

        my_pipe = Hyperpipe(
            "hyperpipe",
            optimizer="random_grid_search",
            optimizer_params={"n_configurations": 3},
            metrics=["accuracy", "precision", "recall"],
            best_config_metric="f1_score",
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=2),
            verbosity=1,
            output_settings=settings,
        )

        preproc = Preprocessing()
        preproc += PipelineElement("StandardScaler")

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch("tree_branch")
        tree_qua_branch += PipelineElement("QuantileTransformer")
        tree_qua_branch += PipelineElement(
            "DecisionTreeClassifier",
            {"min_samples_split": IntegerRange(2, 4)},
            criterion="gini",
        )

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch("svm_branch")
        svm_mima_branch += PipelineElement("MinMaxScaler")
        svm_mima_branch += PipelineElement(
            "SVC", {"kernel": Categorical(["rbf", "linear"]), "C": 2.0}, gamma="auto"
        )

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch("neighbour_branch")
        knn_sta_branch += PipelineElement.create("dummy", DummyTransformer(), {})
        knn_sta_branch += PipelineElement("KNeighborsClassifier")

        my_pipe += preproc
        # voting = True to mean the result of every branch
        my_pipe += Stack(
            "final_stack", [tree_qua_branch, svm_mima_branch, knn_sta_branch]
        )

        my_pipe += PipelineElement("LogisticRegression", solver="lbfgs")

        my_pipe.fit(self.__X, self.__y)
        model_path = os.path.join(
            my_pipe.output_settings.results_folder, "photon_best_model.photon"
        )
        self.assertTrue(os.path.exists(model_path))

        # now move optimum pipe to new folder
        test_folder = os.path.join(
            my_pipe.output_settings.results_folder, "new_test_folder"
        )
        new_model_path = os.path.join(test_folder, "photon_best_model.photon")
        os.makedirs(test_folder)
        shutil.copyfile(model_path, new_model_path)

        # check if load_optimum_pipe also works
        # check if we have the meta information recovered
        loaded_optimum_pipe = Hyperpipe.load_optimum_pipe(new_model_path)
        self.assertIsNotNone(loaded_optimum_pipe._meta_information)
        self.assertIsNotNone(loaded_optimum_pipe._meta_information["photon_version"])

        # check if predictions stay realiably the same
        y_pred_loaded = loaded_optimum_pipe.predict(self.__X)
        y_pred = my_pipe.optimum_pipe.predict(self.__X)
        np.testing.assert_array_equal(y_pred_loaded, y_pred)
예제 #13
0
    def test_combi_from_single_and_group_caching(self):

        # 2. specify cache directories
        cache_folder_base = self.cache_folder_path
        cache_folder_neuro = os.path.join(cache_folder_base,
                                          'subject_caching_test')

        CacheManager.clear_cache_files(cache_folder_base)
        CacheManager.clear_cache_files(cache_folder_neuro)

        # 3. set up Neuro Branch
        nb = ParallelBranch("SubjectCaching", nr_of_processes=3)
        # increase complexity by adding batching
        nb += PipelineElement.create("ResampleImages",
                                     StupidAdditionTransformer(), {},
                                     batch_size=4)
        nb.base_element.cache_folder = cache_folder_neuro

        # 4. setup usual pipeline
        ss = PipelineElement("StandardScaler", {})
        pca = PipelineElement("PCA", {'n_components': [3, 10, 50]})
        svm = PipelineElement("SVR", {'kernel': ['rbf', 'linear']})

        pipe = PhotonPipeline([('NeuroBranch', nb), ('StandardScaler', ss),
                               ('PCA', pca), ('SVR', svm)])

        pipe.caching = True
        pipe.fold_id = "12345643463434"
        pipe.cache_folder = cache_folder_base

        def transform_and_check_folder(config, expected_nr_of_files_group,
                                       expected_nr_subject):
            pipe.set_params(**config)
            pipe.fit(self.X, self.y)
            nr_of_generated_cache_files = len(
                glob.glob(os.path.join(cache_folder_base, "*.p")))
            self.assertTrue(
                nr_of_generated_cache_files == expected_nr_of_files_group)

            nr_of_generated_cache_files_subject = len(
                glob.glob(os.path.join(cache_folder_neuro, "*.p")))
            self.assertTrue(
                nr_of_generated_cache_files_subject == expected_nr_subject)

        config1 = {
            'NeuroBranch__ResampleImages__voxel_size': 5,
            'PCA__n_components': 7,
            'SVR__C': 2
        }
        config2 = {
            'NeuroBranch__ResampleImages__voxel_size': 3,
            'PCA__n_components': 4,
            'SVR__C': 5
        }

        # first config we expect to have a cached_file for the standard scaler and the pca
        # and we expect to have two files (one resampler, one brain mask) for each input data
        transform_and_check_folder(config1, 2,
                                   self.nr_of_expected_pickles_per_config)

        # second config we expect to have two cached_file for the standard scaler (one time for 5 voxel input and one
        # time for 3 voxel input) and two files two for the first and second config pcas,
        # and we expect to have 2 * nr of input data for resampler plus one time masker
        transform_and_check_folder(config2, 4,
                                   2 * self.nr_of_expected_pickles_per_config)

        # when we transform with the first config again, nothing should happen
        transform_and_check_folder(config1, 4,
                                   2 * self.nr_of_expected_pickles_per_config)

        # when we transform with an empty config, a new entry for pca and standard scaler should be generated, as well
        # as a new cache item for each input data from the neuro branch for each itemin the neuro branch
        with self.assertRaises(ValueError):
            transform_and_check_folder({}, 6, 4 *
                                       self.nr_of_expected_pickles_per_config)

        CacheManager.clear_cache_files(cache_folder_base)
        CacheManager.clear_cache_files(cache_folder_neuro)
예제 #14
0
    def predict(self, X, **kwargs):
        y_true = kwargs["true_predictions"]
        assert X.shape[0] == len(y_true)
        return y_true

    def save(self):
        return None


# WE USE THE BREAST CANCER SET FROM SKLEARN
X, y = load_breast_cancer(return_X_y=True)

settings = OutputSettings(project_folder='./tmp/')

# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe('basic_svm_pipe',
                    metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],  # the performance metrics of your interest
                    best_config_metric='accuracy',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=3),
                    verbosity=1,
                    output_settings=settings)

my_pipe.add(PipelineElement('StandardScaler'))


my_pipe += PipelineElement.create("CustomWrapper", AdditionalDataWrapper(), hyperparameters={})

my_pipe.fit(X, y, true_predictions=np.array(y))

예제 #15
0
    def test_save_optimum_pipe(self):
        # todo: test .save() of custom model
        tmp_path = os.path.join(self.tmp_folder_path, 'optimum_pipypipe')
        settings = OutputSettings(project_folder=tmp_path,
                                  overwrite_results=True)

        my_pipe = Hyperpipe('hyperpipe',
                            optimizer='random_grid_search',
                            optimizer_params={'n_configurations': 3},
                            metrics=['accuracy', 'precision', 'recall'],
                            best_config_metric='f1_score',
                            outer_cv=KFold(n_splits=2),
                            inner_cv=KFold(n_splits=2),
                            verbosity=1,
                            output_settings=settings)

        preproc = Preprocessing()
        preproc += PipelineElement('StandardScaler')

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch('tree_branch')
        tree_qua_branch += PipelineElement('QuantileTransformer')
        tree_qua_branch += PipelineElement(
            'DecisionTreeClassifier',
            {'min_samples_split': IntegerRange(2, 4)},
            criterion='gini')

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch('svm_branch')
        svm_mima_branch += PipelineElement('MinMaxScaler')
        svm_mima_branch += PipelineElement(
            'SVC', {
                'kernel': Categorical(['rbf', 'linear']),
                'C': 2.0
            },
            gamma='auto')

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch('neighbour_branch')
        knn_sta_branch += PipelineElement.create("dummy", DummyTransformer(),
                                                 {})
        knn_sta_branch += PipelineElement('KNeighborsClassifier')

        my_pipe += preproc
        # voting = True to mean the result of every branch
        my_pipe += Stack('final_stack',
                         [tree_qua_branch, svm_mima_branch, knn_sta_branch])

        my_pipe += PipelineElement('LogisticRegression', solver='lbfgs')

        my_pipe.fit(self.__X, self.__y)
        model_path = os.path.join(my_pipe.output_settings.results_folder,
                                  'photon_best_model.photon')
        self.assertTrue(os.path.exists(model_path))

        # now move optimum pipe to new folder
        test_folder = os.path.join(my_pipe.output_settings.results_folder,
                                   'new_test_folder')
        new_model_path = os.path.join(test_folder, 'photon_best_model.photon')
        os.makedirs(test_folder)
        shutil.copyfile(model_path, new_model_path)

        # check if load_optimum_pipe also works
        # check if we have the meta information recovered
        loaded_optimum_pipe = Hyperpipe.load_optimum_pipe(new_model_path)
        self.assertIsNotNone(loaded_optimum_pipe._meta_information)
        self.assertIsNotNone(
            loaded_optimum_pipe._meta_information['photon_version'])

        # check if predictions stay realiably the same
        y_pred_loaded = loaded_optimum_pipe.predict(self.__X)
        y_pred = my_pipe.optimum_pipe.predict(self.__X)
        np.testing.assert_array_equal(y_pred_loaded, y_pred)