def run(self): x = self._input.get_x() y = self._input.get_y() kernel = self._input.get_kernel() self._algorithm = algorithm.DualSVMAlgorithm( kernel, y, balanced=self._balanced, grid_search_folds=self._grid_search_folds, c_range=self._c_range, n_threads=self._n_threads) self._validation = validation.RepeatedHoldOut( self._algorithm, n_iterations=self._n_iterations, test_size=self._test_size) classifier, best_params, results = self._validation.validate( y, n_threads=self._n_threads, splits_indices=self._splits_indices) classifier_dir = path.join(self._output_dir, 'classifier') if not path.exists(classifier_dir): os.makedirs(classifier_dir) self._algorithm.save_classifier(classifier, classifier_dir) self._algorithm.save_parameters(best_params, classifier_dir) weights = self._algorithm.save_weights(classifier, x, classifier_dir) self._input.save_weights_as_nifti(weights, classifier_dir) self._validation.save_results(self._output_dir)
def run(self): x = self._input.get_x() y = self._input.get_y() kernel = self._input.get_kernel() # what is a kernel here??? if y[0] == 0: print 'The first label of diagnose is 0, it means the voxels with negative coefficients in the weight image are more likely to be classified as the first label in the diagnose tsv' else: print 'The first label of diagnose is 1, it means the voxels with positive coefficients in the weight image are more likely to be classified as the second label in the diagnose tsv' self._algorithm = algorithm.DualSVMAlgorithm( kernel, y, balanced=self._balanced, grid_search_folds=self._grid_search_folds, c_range=self._c_range, n_threads=self._n_threads) self._validation = validation.RepeatedHoldOut( self._algorithm, n_iterations=self._n_iterations, test_size=self._test_size) classifier, best_params, results = self._validation.validate( y, n_threads=self._n_threads, splits_indices=self._splits_indices) classifier_dir = path.join(self._output_dir, 'classifier') if not path.exists(classifier_dir): os.makedirs(classifier_dir) self._algorithm.save_classifier(classifier, classifier_dir) self._algorithm.save_parameters(best_params, classifier_dir) weights = self._algorithm.save_weights(classifier, x, classifier_dir) self._input.save_weights_as_nifti(weights, classifier_dir) self._validation.save_results(self._output_dir)
def run(self): # Call on parameters already computed x = self._input.get_x() y = self._input.get_y() kernel = self._input.get_kernel() # Now algorithm has been selected, in this case Dual SVM algorithm. # Look at algorithm.py to understand the input necessary for each method # input parameters were chosen previously self._algorithm = algorithm.DualSVMAlgorithm( kernel, y, balanced=self._balanced, grid_search_folds=self._grid_search_folds, c_range=self._c_range, n_threads=self._n_threads) # Here validation type is selected, it's the K fold cross-validation self._validation = KFoldCV(self._algorithm) classifier, best_params, results = self._validation.validate( y, n_threads=self._n_threads, splits_indices=self._splits_indices, n_folds=self._n_folds) # Creation of the path where all the results will be saved classifier_dir = path.join(self._output_dir, 'classifier') if not path.exists(classifier_dir): os.makedirs(classifier_dir) # Here we have selected what we want save self._algorithm.save_classifier(classifier, classifier_dir) weights = self._algorithm.save_weights(classifier, x, classifier_dir) self._algorithm.save_parameters(best_params, classifier_dir) self._validation.save_results(self._output_dir) self._input.save_weights_as_nifti(weights, classifier_dir) ## save the model for each split only fitting with the training data for n_fold in range(self._n_folds): svc, _, train_index = apply_best_parameters_each_split( kernel, x, y, results, self._balanced, n_fold, self._diagnoses_tsv, self._output_dir) classifier_dir = path.join(self._output_dir, 'classifier', 'fold_' + str(n_fold)) if not path.exists(classifier_dir): os.makedirs(classifier_dir) self._algorithm.save_classifier(svc, classifier_dir) ## save the train index for recontruction purpose np.savetxt(path.join(classifier_dir, 'train_index.txt'), train_index)
def run(self): x = self._input.get_x() y = self._input.get_y() kernel = self._input.get_kernel() self._algorithm = algorithm.DualSVMAlgorithm( kernel, y, balanced=self._balanced, grid_search_folds=self._grid_search_folds, c_range=self._c_range, n_threads=self._n_threads) self._validation = BalancedLearningCurveRepeatedHoldOut( self._algorithm, n_iterations=self._n_iterations, test_size=self._test_size, n_learning_points=self._n_learning_points) classifier, best_params, results = self._validation.validate( y, splits_indices=self._splits_indices, n_threads=self._n_threads) for learning_point in range(self._n_learning_points): learning_point_dir = path.join( self._output_dir, 'learning_split-' + str(learning_point)) classifier_dir = path.join(learning_point_dir, 'classifier') if not path.exists(classifier_dir): os.makedirs(classifier_dir) print classifier_dir self._algorithm.save_classifier(classifier[learning_point], classifier_dir) self._algorithm.save_parameters(best_params[learning_point], classifier_dir) weights = self._algorithm.save_weights(classifier[learning_point], x, classifier_dir) self._input.save_weights_as_nifti(weights, classifier_dir) self._validation.save_results(self._output_dir)
def run(self): # Call on parameters already computed x = self._input.get_x() y = self._input.get_y() kernel = self._input.get_kernel() # Now algorithm has been selected, in this case Dual SVM algorithm. # Look at algorithm.py to understand the input necessary for each method # input parameters were chosen previously self._algorithm = algorithm.DualSVMAlgorithm( kernel, y, balanced=self._balanced, grid_search_folds=self._grid_search_folds, c_range=self._c_range, n_threads=self._n_threads) # Here validation type is selected, it's the K fold cross-validation self._validation = validation.KFoldCV(self._algorithm) classifier, best_params, results = self._validation.validate( y, n_folds=self._n_folds, n_threads=self._n_threads) # Creation of the path where all the results will be saved classifier_dir = path.join(self._output_dir, 'classifier') if not path.exists(classifier_dir): os.makedirs(classifier_dir) # Here we have selected whant we wanted save self._algorithm.save_classifier(classifier, classifier_dir) self._algorithm.save_weights(classifier, x, classifier_dir) self._algorithm.save_parameters(best_params, classifier_dir) self._validation.save_results(self._output_dir)
def run(self): x = self._input.get_x() y = self._input.get_y() ## feature selection for all the data ## This is ANOVA test Univariate FS ## get the training and testing data for feature selection if self._feature_selection_method == 'ANOVA': selector = SelectPercentile(f_classif, percentile=self._top_k) selector.fit(x, y) elif self._feature_selection_method == 'RF': clf = RandomForestClassifier(n_estimators=250, random_state=0, n_jobs=-1) clf.fit(x, y) selector = SelectFromModel(clf, threshold=self._top_k) selector.fit(x, y) elif self._feature_selection_method == 'PCA': selector = PCA(n_components=self._top_k) selector.fit(x) elif self._feature_selection_method == 'RFE': svc = SVR(kernel="linear") selector = RFE(estimator=svc, n_features_to_select=int(0.01 * self._top_k * x.shape[1]), step=0.5) selector.fit(x, y) else: print('Method has not been implemented') x_after_fs = selector.transform(x) print 'In total, there are %d voxels in this task' % x.shape[1] print 'The threshold is %f' % (self._top_k) print 'We select the %d most discriminative voxels' % x_after_fs.shape[ 1] kernel = utils.gram_matrix_linear(x_after_fs) if y[0] == 0: print 'The first label of diagnose is 0, it means the voxels with negative coefficients in the weight image are more likely to be classified as the first label in the diagnose tsv' else: print 'The first label of diagnose is 1, it means the voxels with positive coefficients in the weight image are more likely to be classified as the second label in the diagnose tsv' self._algorithm = algorithm.DualSVMAlgorithm( kernel, y, balanced=self._balanced, grid_search_folds=self._grid_search_folds, c_range=self._c_range, n_threads=self._n_threads) self._validation = validation.RepeatedHoldOut( self._algorithm, n_iterations=self._n_iterations, test_size=self._test_size) classifier, best_params, results = self._validation.validate( y, n_threads=self._n_threads, splits_indices=self._splits_indices) classifier_dir = path.join(self._output_dir, 'classifier') if not path.exists(classifier_dir): os.makedirs(classifier_dir) self._algorithm.save_classifier(classifier, classifier_dir) self._algorithm.save_parameters(best_params, classifier_dir) weights = self._algorithm.save_weights(classifier, x, classifier_dir) self._input.save_weights_as_nifti(weights, classifier_dir) self._validation.save_results(self._output_dir)