Python PipelineElementの例、photonai.base.PipelineElement Pythonの例

コード例 #1

0

ファイルを表示

    def test_inverse_tansform(self):
        # simple pipe
        sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)])
        sk_pipe.fit(self.X, self.y)
        sk_transform = sk_pipe.transform(self.X)
        sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform)

        photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)])
        photon_pipe.fit(self.X, self.y)
        p_transform, _, _ = photon_pipe.transform(self.X)
        p_inverse_transformed, _, _ = photon_pipe.inverse_transform(
            p_transform)

        self.assertTrue(
            np.array_equal(sk_inverse_transformed, p_inverse_transformed))

        # now including stack
        stack = Stack('stack', [self.p_pca])
        stack_pipeline = PhotonPipeline([
            ("stack", stack),
            ('StandardScaler', PipelineElement('StandardScaler')),
            ('LinearSVC', PipelineElement('LinearSVC'))
        ])
        stack_pipeline.fit(self.X, self.y)
        feature_importances = stack_pipeline.feature_importances_
        inversed_data, _, _ = stack_pipeline.inverse_transform(
            feature_importances)
        self.assertEqual(inversed_data.shape[1], self.X.shape[1])

コード例 #2

0

ファイルを表示

    def test_neuro_module_branch(self):
        nmb = NeuroBranch('best_branch_ever')
        nmb += PipelineElement('SmoothImages', fwhm=10)
        nmb += PipelineElement('ResampleImages', voxel_size=5)
        nmb += PipelineElement('BrainAtlas', rois=['Hippocampus_L', 'Hippocampus_R'],
                               atlas_name="AAL", extract_mode='vec')

        nmb.base_element.cache_folder = self.cache_folder_path
        CacheManager.clear_cache_files(nmb.base_element.cache_folder, True)
        # set the config so that caching works
        nmb.set_params(**{'SmoothImages__fwhm': 10, 'ResampleImages__voxel_size': 5})

        # okay we are transforming 8 Niftis with 3 elements, so afterwards there should be 3*8
        nr_niftis = 7
        nmb.transform(self.X[:nr_niftis])
        nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p")))
        self.assertTrue(nr_files_in_folder == 3 * nr_niftis)
        self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3*nr_niftis))

        # transform 3 items that should have been cached and two more that need new processing
        nmb.transform(self.X[nr_niftis-2::])
        # now we should have 10 * 3
        nr_files_in_folder = len(glob.glob(os.path.join(nmb.base_element.cache_folder, "*.p")))
        self.assertTrue(nr_files_in_folder == (3 * len(self.X)))
        self.assertTrue(len(nmb.base_element.cache_man.cache_index.items()) == (3 * len(self.X)))

コード例 #3

0

ファイルを表示

        def objective_function_simple(self, cfg):
            cfg = {k: cfg[k] for k in cfg if cfg[k]}
            values = []

            train_indices = list(self.pipe.cross_validation.outer_folds.values(
            ))[0].train_indices
            self._validation_X, self._validation_y, _ = PhotonDataHelper.split_data(
                self.X, self.y, kwargs=None, indices=train_indices)

            for inner_fold in list(
                    list(self.pipe.cross_validation.inner_folds.values())
                [0].values()):
                sc = PipelineElement("StandardScaler", {})
                pca = PipelineElement("PCA", {}, random_state=42)
                svc = PipelineElement("SVC", {}, random_state=42, gamma='auto')
                my_pipe = PhotonPipeline([('StandardScaler', sc), ('PCA', pca),
                                          ('SVC', svc)])
                my_pipe.set_params(**cfg)
                my_pipe.fit(self._validation_X[inner_fold.train_indices, :],
                            self._validation_y[inner_fold.train_indices])
                values.append(
                    accuracy_score(
                        self._validation_y[inner_fold.test_indices],
                        my_pipe.predict(
                            self._validation_X[inner_fold.test_indices, :])))
            return 1 - np.mean(values)

コード例 #4

0

ファイルを表示

ファイル: test_parallel_pipeline.py プロジェクト: mkueh/photonai

    def test_neuro_hyperpipe_parallelized_batched_caching(self):

        cache_path = self.cache_folder_path

        self.hyperpipe = Hyperpipe('complex_case',
                                   inner_cv=KFold(n_splits=5),
                                   outer_cv=KFold(n_splits=3),
                                   optimizer='grid_search',
                                   cache_folder=cache_path,
                                   metrics=['mean_squared_error'],
                                   best_config_metric='mean_squared_error',
                                   output_settings=OutputSettings(
                                       project_folder=self.tmp_folder_path))

        nb = ParallelBranch("SubjectCaching", nr_of_processes=1)
        nb += PipelineElement.create("ResampleImages",
                                     StupidAdditionTransformer(),
                                     {'voxel_size': [3, 5, 10]},
                                     batch_size=4)
        self.hyperpipe += nb

        self.hyperpipe += PipelineElement("StandardScaler", {})
        self.hyperpipe += PipelineElement("PCA", {'n_components': [3, 4]})
        self.hyperpipe += PipelineElement("SVR", {'kernel': ['rbf', 'linear']})

        self.hyperpipe.fit(self.X, self.y)

        # assert cache is empty again
        nr_of_p_files = len(
            glob.glob(os.path.join(self.hyperpipe.cache_folder, "*.p")))
        print(nr_of_p_files)
        self.assertTrue(nr_of_p_files == 0)

コード例 #5

0

ファイルを表示

    def setup_crazy_pipe(self):
        # erase all, we need a complex and crazy task
        self.hyperpipe.elements = list()

        nmb_list = list()
        for i in range(5):
            nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3)
            sp = PipelineElement(
                'PCA', hyperparameters={'n_components': IntegerRange(1, 50)})
            nmb += sp
            nmb_list.append(nmb)

        my_switch = Switch('disabling_test_switch')
        my_switch += nmb_list[0]
        my_switch += nmb_list[1]

        my_stack = Stack('stack_of_branches')
        for i in range(3):
            my_branch = Branch('branch_' + str(i + 2))
            my_branch += PipelineElement('StandardScaler')
            my_branch += nmb_list[i + 2]
            my_stack += my_branch

        self.hyperpipe.add(my_stack)
        self.hyperpipe.add(PipelineElement('StandardScaler'))
        self.hyperpipe.add(my_switch)
        self.hyperpipe.add(PipelineElement('SVC'))
        return nmb_list

コード例 #6

0

ファイルを表示

ファイル: ConfounderRemovalTests.py プロジェクト: nkourkou/photon

    def test_confounder_removal_statistically(self):
        cr = PipelineElement("ConfounderRemoval", {},
                             standardize_covariates=False)
        cr.fit(self.z[:, 1:3], self.z[:, 0], **{"confounder": self.z[:, 3]})

        # use transform to write data to cache
        z_transformed = cr.transform(self.z[:, 1:3],
                                     **{"confounder": self.z[:, 3]})
        corr = np.corrcoef(
            np.concatenate(
                [
                    self.z[:, 0].reshape(-1, 1),
                    z_transformed[0],
                    self.z[:, 3].reshape(-1, 1),
                ],
                axis=1,
            ),
            rowvar=False,
        )
        # correlation between target and feature should be lower than 0.25 in this case
        # correlation between covariate and feature should be near zero
        self.assertLess(corr[1, 0], 0.25)
        self.assertLess(corr[2, 0], 0.25)
        self.assertAlmostEqual(corr[3, 1], 0)
        self.assertAlmostEqual(corr[3, 2], 0)

コード例 #7

0

ファイルを表示

        def create_instances_and_transform(neuro_class_str, param_dict, transformed_X):

            for i in range(1, 4):
                if i == 1 or i == 3:
                    obj = NeuroBranch(name="single core application", nr_of_processes=1)
                else:
                    obj = NeuroBranch(name="multi core application", nr_of_processes=3)

                if i < 3:
                    obj += PipelineElement(neuro_class_str, **param_dict)
                if i >= 3:
                    obj += PipelineElement(neuro_class_str, batch_size=5, **param_dict)

                # transform data
                obj.base_element.cache_folder = self.cache_folder_path
                obj.base_element.current_config = {"test_suite": 1}
                new_X, _, _ = obj.transform(self.X)
                obj.base_element.clear_cache()

                # compare output to nilearn version
                for index, nilearn_nifti in enumerate(transformed_X):
                    photon_nifti = new_X[index]
                    if isinstance(photon_nifti, Nifti1Image):
                        self.assertTrue(
                            np.array_equal(photon_nifti.dataobj, nilearn_nifti.dataobj)
                        )
                    else:
                        self.assertTrue(
                            np.array_equal(
                                np.asarray(photon_nifti), nilearn_nifti.dataobj
                            )
                        )

                print("finished testing object: all images are fine.")

コード例 #8

0

ファイルを表示

    def test_huge_combinations(self):
        hp = Hyperpipe(
            "huge_combinations",
            metrics=["accuracy"],
            best_config_metric="accuracy",
            output_settings=OutputSettings(
                project_folder=self.tmp_folder_path),
        )

        hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]})
        stack = Stack("ensemble")
        for i in range(20):
            stack += PipelineElement(
                "SVC",
                hyperparameters={
                    "C": FloatRange(0.001, 5),
                    "kernel": ["linear", "rbf", "sigmoid", "polynomial"],
                },
            )
        hp += stack
        hp += PipelineElement(
            "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]})
        X, y = load_breast_cancer(True)
        with self.assertRaises(Warning):
            hp.fit(X, y)

コード例 #9

0

ファイルを表示

ファイル: test_architecture.py プロジェクト: mkueh/photonai

    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement('StandardScaler')
            pipe += PipelineElement('SamplePairingClassification',
                                    {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])},
                                    distance_metric='euclidean', test_disabled=True)
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})

            self.run_hyperpipe(pipe, self.classification)

コード例 #10

0

ファイルを表示