Python RepeatedHoldOut Beispiele, clinica.pipelines.machine_learning.validation.RepeatedHoldOut Python Beispiele

Beispiel #1

0

Datei anzeigen

    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()
        kernel = self._input.get_kernel()

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)

        self._validation = validation.RepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size)

        classifier, best_params, results = self._validation.validate(
            y, n_threads=self._n_threads, splits_indices=self._splits_indices)
        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        weights = self._algorithm.save_weights(classifier, x, classifier_dir)

        self._input.save_weights_as_nifti(weights, classifier_dir)

        self._validation.save_results(self._output_dir)

Beispiel #2

0

Datei anzeigen

Datei: mlworkflow_dwi.py Projekt: ngohgia/AD-ML

    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()
        kernel = self._input.get_kernel()  # what is a kernel here???
        if y[0] == 0:
            print 'The first label of diagnose is 0, it means the voxels with negative coefficients in the weight image are more likely to be classified as the first label in the diagnose tsv'
        else:
            print 'The first label of diagnose is 1, it means the voxels with positive coefficients in the weight image are more likely to be classified as the second label in the diagnose tsv'

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)

        self._validation = validation.RepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size)
        classifier, best_params, results = self._validation.validate(
            y, n_threads=self._n_threads, splits_indices=self._splits_indices)
        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        weights = self._algorithm.save_weights(classifier, x, classifier_dir)

        self._input.save_weights_as_nifti(weights, classifier_dir)

        self._validation.save_results(self._output_dir)

Beispiel #3

0

Datei anzeigen

    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()

        self._algorithm = algorithm.RandomForest(
            x,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            n_estimators_range=self._n_estimators_range,
            max_depth_range=self._max_depth_range,
            min_samples_split_range=self._min_samples_split_range,
            max_features_range=self._max_features_range,
            n_threads=self._n_threads)

        self._validation = validation.RepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size)
        classifier, best_params, results = self._validation.validate(
            y, n_threads=self._n_threads, splits_indices=self._splits_indices)

        classifier_dir = os.path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        weights = self._algorithm.save_weights(classifier, classifier_dir)

Beispiel #4

0

Datei anzeigen

Datei: mlworkflow_dwi.py Projekt: ngohgia/AD-ML

    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()

        ## feature selection for all the data
        ## This is ANOVA test Univariate FS
        ## get the training and testing data for feature selection
        if self._feature_selection_method == 'ANOVA':
            selector = SelectPercentile(f_classif, percentile=self._top_k)
            selector.fit(x, y)
        elif self._feature_selection_method == 'RF':
            clf = RandomForestClassifier(n_estimators=250,
                                         random_state=0,
                                         n_jobs=-1)
            clf.fit(x, y)
            selector = SelectFromModel(clf, threshold=self._top_k)
            selector.fit(x, y)
        elif self._feature_selection_method == 'PCA':
            selector = PCA(n_components=self._top_k)
            selector.fit(x)
        elif self._feature_selection_method == 'RFE':
            svc = SVR(kernel="linear")
            selector = RFE(estimator=svc,
                           n_features_to_select=int(0.01 * self._top_k *
                                                    x.shape[1]),
                           step=0.5)
            selector.fit(x, y)

        else:
            print('Method has not been implemented')

        x_after_fs = selector.transform(x)

        print 'In total, there are %d voxels in this task' % x.shape[1]
        print 'The threshold is %f' % (self._top_k)
        print 'We select the %d most discriminative voxels' % x_after_fs.shape[
            1]

        kernel = utils.gram_matrix_linear(x_after_fs)
        if y[0] == 0:
            print 'The first label of diagnose is 0, it means the voxels with negative coefficients in the weight image are more likely to be classified as the first label in the diagnose tsv'
        else:
            print 'The first label of diagnose is 1, it means the voxels with positive coefficients in the weight image are more likely to be classified as the second label in the diagnose tsv'

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)

        self._validation = validation.RepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size)
        classifier, best_params, results = self._validation.validate(
            y, n_threads=self._n_threads, splits_indices=self._splits_indices)
        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        weights = self._algorithm.save_weights(classifier, x, classifier_dir)

        self._input.save_weights_as_nifti(weights, classifier_dir)

        self._validation.save_results(self._output_dir)