コード例 #1
0
    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()
        kernel = self._input.get_kernel()

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)

        self._validation = validation.RepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size)

        classifier, best_params, results = self._validation.validate(
            y, n_threads=self._n_threads, splits_indices=self._splits_indices)
        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        weights = self._algorithm.save_weights(classifier, x, classifier_dir)

        self._input.save_weights_as_nifti(weights, classifier_dir)

        self._validation.save_results(self._output_dir)
コード例 #2
0
ファイル: mlworkflow_dwi.py プロジェクト: ngohgia/AD-ML
    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()
        kernel = self._input.get_kernel()  # what is a kernel here???
        if y[0] == 0:
            print 'The first label of diagnose is 0, it means the voxels with negative coefficients in the weight image are more likely to be classified as the first label in the diagnose tsv'
        else:
            print 'The first label of diagnose is 1, it means the voxels with positive coefficients in the weight image are more likely to be classified as the second label in the diagnose tsv'

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)

        self._validation = validation.RepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size)
        classifier, best_params, results = self._validation.validate(
            y, n_threads=self._n_threads, splits_indices=self._splits_indices)
        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        weights = self._algorithm.save_weights(classifier, x, classifier_dir)

        self._input.save_weights_as_nifti(weights, classifier_dir)

        self._validation.save_results(self._output_dir)
コード例 #3
0
ファイル: model.py プロジェクト: basrie/AD-DL
    def run(self):

        # Call on parameters already computed

        x = self._input.get_x()
        y = self._input.get_y()
        kernel = self._input.get_kernel()

        # Now algorithm has been selected, in this case Dual SVM algorithm.
        # Look at algorithm.py to understand the input necessary for each method
        # input parameters were chosen previously

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)
        # Here validation type is selected, it's the K fold cross-validation

        self._validation = KFoldCV(self._algorithm)

        classifier, best_params, results = self._validation.validate(
            y,
            n_threads=self._n_threads,
            splits_indices=self._splits_indices,
            n_folds=self._n_folds)

        # Creation of the path where all the results will be saved

        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        # Here we have selected what we want save
        self._algorithm.save_classifier(classifier, classifier_dir)
        weights = self._algorithm.save_weights(classifier, x, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        self._validation.save_results(self._output_dir)
        self._input.save_weights_as_nifti(weights, classifier_dir)
        ## save the model for each split only fitting with the training data
        for n_fold in range(self._n_folds):
            svc, _, train_index = apply_best_parameters_each_split(
                kernel, x, y, results, self._balanced, n_fold,
                self._diagnoses_tsv, self._output_dir)
            classifier_dir = path.join(self._output_dir, 'classifier',
                                       'fold_' + str(n_fold))
            if not path.exists(classifier_dir):
                os.makedirs(classifier_dir)
            self._algorithm.save_classifier(svc, classifier_dir)

            ## save the train index for recontruction purpose
            np.savetxt(path.join(classifier_dir, 'train_index.txt'),
                       train_index)
コード例 #4
0
ファイル: workflow.py プロジェクト: ngohgia/AD-ML
    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()
        kernel = self._input.get_kernel()

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)

        self._validation = BalancedLearningCurveRepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size,
            n_learning_points=self._n_learning_points)

        classifier, best_params, results = self._validation.validate(
            y, splits_indices=self._splits_indices, n_threads=self._n_threads)

        for learning_point in range(self._n_learning_points):

            learning_point_dir = path.join(
                self._output_dir, 'learning_split-' + str(learning_point))

            classifier_dir = path.join(learning_point_dir, 'classifier')
            if not path.exists(classifier_dir):
                os.makedirs(classifier_dir)

            print classifier_dir

            self._algorithm.save_classifier(classifier[learning_point],
                                            classifier_dir)
            self._algorithm.save_parameters(best_params[learning_point],
                                            classifier_dir)
            weights = self._algorithm.save_weights(classifier[learning_point],
                                                   x, classifier_dir)

            self._input.save_weights_as_nifti(weights, classifier_dir)

        self._validation.save_results(self._output_dir)
コード例 #5
0
    def run(self):

        # Call on parameters already computed

        x = self._input.get_x()
        y = self._input.get_y()
        kernel = self._input.get_kernel()

        # Now algorithm has been selected, in this case Dual SVM algorithm.
        # Look at algorithm.py to understand the input necessary for each method
        # input parameters were chosen previously

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)
        # Here validation type is selected, it's the K fold cross-validation

        self._validation = validation.KFoldCV(self._algorithm)

        classifier, best_params, results = self._validation.validate(
            y, n_folds=self._n_folds, n_threads=self._n_threads)

        # Creation of the path where all the results will be saved

        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        # Here we have selected whant we wanted save
        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_weights(classifier, x, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)

        self._validation.save_results(self._output_dir)
コード例 #6
0
ファイル: mlworkflow_dwi.py プロジェクト: ngohgia/AD-ML
    def run(self):

        x = self._input.get_x()
        y = self._input.get_y()

        ## feature selection for all the data
        ## This is ANOVA test Univariate FS
        ## get the training and testing data for feature selection
        if self._feature_selection_method == 'ANOVA':
            selector = SelectPercentile(f_classif, percentile=self._top_k)
            selector.fit(x, y)
        elif self._feature_selection_method == 'RF':
            clf = RandomForestClassifier(n_estimators=250,
                                         random_state=0,
                                         n_jobs=-1)
            clf.fit(x, y)
            selector = SelectFromModel(clf, threshold=self._top_k)
            selector.fit(x, y)
        elif self._feature_selection_method == 'PCA':
            selector = PCA(n_components=self._top_k)
            selector.fit(x)
        elif self._feature_selection_method == 'RFE':
            svc = SVR(kernel="linear")
            selector = RFE(estimator=svc,
                           n_features_to_select=int(0.01 * self._top_k *
                                                    x.shape[1]),
                           step=0.5)
            selector.fit(x, y)

        else:
            print('Method has not been implemented')

        x_after_fs = selector.transform(x)

        print 'In total, there are %d voxels in this task' % x.shape[1]
        print 'The threshold is %f' % (self._top_k)
        print 'We select the %d most discriminative voxels' % x_after_fs.shape[
            1]

        kernel = utils.gram_matrix_linear(x_after_fs)
        if y[0] == 0:
            print 'The first label of diagnose is 0, it means the voxels with negative coefficients in the weight image are more likely to be classified as the first label in the diagnose tsv'
        else:
            print 'The first label of diagnose is 1, it means the voxels with positive coefficients in the weight image are more likely to be classified as the second label in the diagnose tsv'

        self._algorithm = algorithm.DualSVMAlgorithm(
            kernel,
            y,
            balanced=self._balanced,
            grid_search_folds=self._grid_search_folds,
            c_range=self._c_range,
            n_threads=self._n_threads)

        self._validation = validation.RepeatedHoldOut(
            self._algorithm,
            n_iterations=self._n_iterations,
            test_size=self._test_size)
        classifier, best_params, results = self._validation.validate(
            y, n_threads=self._n_threads, splits_indices=self._splits_indices)
        classifier_dir = path.join(self._output_dir, 'classifier')
        if not path.exists(classifier_dir):
            os.makedirs(classifier_dir)

        self._algorithm.save_classifier(classifier, classifier_dir)
        self._algorithm.save_parameters(best_params, classifier_dir)
        weights = self._algorithm.save_weights(classifier, x, classifier_dir)

        self._input.save_weights_as_nifti(weights, classifier_dir)

        self._validation.save_results(self._output_dir)