def crossvalidate_linearsvm(self, folds=None, accuracy_weight=None): self.setup_crossvalidation(folds=folds) self.fold_accuracies = [] for i in range(self.folds): train_indices = self.cv.train_dict[i] test_indices = self.cv.test_dict[i] trainX, trainY, testX, testY = self.assign_groups(train_indices, test_indices) print "Normalizing training data..." trainX = simple_normalize(trainX) print "Normalizing testing data..." testX = simple_normalize(testX) curr_clf = self.fit_linearsvc(trainX, trainY) curr_acc = self.test_accuracy(curr_clf, testX, testY) if not accuracy_weight: self.fold_accuracies.append(curr_acc) elif accuracy_weight == "group_trials": for trial in testY: self.fold_accuracies.append(curr_acc) current_weighted_accuracy = sum(self.fold_accuracies) / len(self.fold_accuracies) print "Current weighted accuracy: ", current_weighted_accuracy self.average_accuracy = sum(self.fold_accuracies) / len(self.fold_accuracies) print "Average accuracy: ", self.average_accuracy
def output_maps(self, nifti_filepath, threshold=0.01, two_tail=True, threshold_type="pvalue"): print "Normalizing X matrix..." Xnorm = simple_normalize(self.data.X) print "Classifying with linear svm..." clf = self.fit_linearsvc(Xnorm, self.data.Y) print "Thresholding and dumping coefficients to file..." self.coefs = clf.coef_[0] if threshold_type == "pvalue": thresholded_coefs = threshold_by_pvalue(self.coefs, threshold, two_tail=two_tail) elif threshold_type == "raw_percentage": thresholded_coefs = threshold_by_rawrange(self.coefs, threshold, two_tail=two_tail) self.nifti.output_nifti_thrumask( thresholded_coefs, self.data.trial_mask, self.data.mask_shape, len(self.data.selected_trs), self.data.raw_affine, nifti_filepath, ) self.nifti.convert_to_afni(nifti_filepath, nifti_filepath[:-4]) self.nifti.adwarp_to_template_talairach( nifti_filepath[:-4] + "+orig", None, self.data.talairach_template_path, self.data.dxyz, overwrite=True )
def test_graphnet(self, X, Y, coefs): X = simple_normalize(X) accuracies = [] for i, coefset in enumerate(coefs): correct = [] print 'Checking accuracy for test group' if self.problemkey == 'RobustGraphNet': coefset = coefset[:-self.trainX_shape[0]] for trial, outcome in zip(X, Y): predict = trial*coefset #print np.sum(predict) Ypredsign = np.sign(np.sum(predict)) if Ypredsign < 0.: Ypredsign = 0. else: Ypredsign = 1. #print Ypredsign, outcome, (Ypredsign == outcome) correct.append(Ypredsign == outcome) fold_accuracy = np.sum(correct) * 1. / len(correct) print 'coef number:', i print 'fold accuracy: ', fold_accuracy accuracies.append(fold_accuracy) return accuracies
def pls_train(self, X, Y, verbose=True): Xn = simple_normalize(X) pls = PLSRegression() if verbose: print 'fitting canonical pls...' pls.fit(Xn, Y) return pls
def pls_test(self, X, Y, pls): Xn = simple_normalize(X) predicted_Y = [x[0] for x in pls.predict(Xn)] pred_Y_sign = np.sign(predicted_Y) Y_sign = np.sign(Y) accuracy = (Y_sign == pred_Y_sign).sum()*1. /Y.shape[0] print 'accuracy in fold: ', accuracy return accuracy
def test_svm(self, X, Y, clf): X = simple_normalize(X) correct = [] print 'Checking accuracy of next test group...' for trial, outcome in zip(X, Y): prediction = clf.predict(trial) correct.append((prediction[0] == outcome)) accuracy = float(sum(correct))/float(len(correct)) print 'Test group accuracy: ', accuracy return accuracy
def normalize_x(self, Xset): Xnorm = simple_normalize(Xset) return Xnorm
def normalize_xset(self, Xset): if self.verbose: print 'normalizing X...' Xnormed = simple_normalize(Xset) return Xnormed
def train_graphnet(self, X, Y, trial_mask=None, G=None, l1=None, l2=None, l3=None, delta=None, svmdelta=None, initial=None, adaptive=False, svm=False, scipy_compare=False, tol=1e-5, greymatter_mask=None, initial_l1weights=None, use_adj_time=True): if not type(l1) in [list, tuple]: l1 = [l1] X = simple_normalize(X) tic = time.clock() #problemkey = self.regression_type_selector(*[bool(x) for x in [l1, l2, l3, delta, svmdelta]]) problemkey = self.regression_type_selector(l1, l2, l3, delta, svmdelta) self.problemkey = problemkey self.trainX_shape = X.shape if problemkey in ('HuberSVMGraphNet', 'RobustGraphNet', 'NaiveGraphNet'): if G is None: #nx = 60 #ny = 60 #A, Afull = construct_adjacency_list(nx, ny, 1, return_full=True) #A, Afull = self.gen_adj(X.shape[1]) #if greymatter_mask is not None: # A, GMA = prepare_adj(trial_mask, numt=1, gm_mask=greymatter_mask) #else: # A = prepare_adj(trial_mask, numt=1) # GMA = None if use_adj_time: A = prepare_adj(trial_mask, numt=1, gm_mask=greymatter_mask) else: A = prepare_adj(trial_mask, numt=0, gm_mask=greymatter_mask) else: A = G.copy() if initial_l1weights is not None: newl1 = l1 else: newl1 = None if problemkey is 'RobustGraphNet': problemtype = graphnet.RobustGraphNet print 'Robust GraphNet with penalties (l1, l2, l3, delta): ', l1, l2, l3, delta l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial)#, gma=GMA) l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=initial_l1weights, newl1=newl1) elif problemkey is 'HuberSVMGraphNet': problemtype = graphnet.GraphSVM print 'HuberSVM GraphNet with penalties (l1, l2, l3, delta): ', l1, l2, l3, delta Y = 2*np.round(np.random.uniform(0, 1, len(Y)))-1 l = cwpath.CoordWise((X, Y, A), problemtype)#, gma=GMA) l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=initial_l1weights, newl1=newl1) elif problemkey is 'NaiveGraphNet': problemtype = graphnet.NaiveGraphNet print 'Testing GraphNet with penalties (l1, l2, l3): ', l1, l2, l3 l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial)#, gma=GMA) l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, l1weights=initial_l1weights, newl1=newl1) elif problemkey is 'NaiveENet': problemtype = graphnet.NaiveENet print 'Testing ENET with penalties (l1, l2): ', l1, l2 l = cwpath.CoordWise((X, Y), problemtype, initial_coefs=initial) l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l1weights=initial_l1weights, newl1=newl1) elif problemkey is 'Lasso': problemtype = graphnet.Lasso print 'Testing LASSO with penalty (l1): ', l1 l = cwpath.CoordWise((X, Y), problemtype, initial_coefs=initial) l.problem.assign_penalty(path_key='l1', l1=l1, l1weights=initial_l1weights, newl1=newl1) else: print 'Incorrect parameters set (no problem key).' return False # Solve the problem: print 'Solving the problem...' coefficients, residuals = l.fit(tol=tol, initial=initial) self.coefficients = coefficients self.residuals = residuals print '\t---> Fitting problem with coordinate decesnt took: ', time.clock()-tic, 'seconds.' if adaptive: tic = time.clock() safety = 1e-5 l1weights = 1./(self.coefficients[-1]+safety) l = cwpath.CoordWise((X, Y, A), problemtype, initial_coefs=initial) l.problem.assign_penalty(path_key='l1', l1=l1, l2=l2, l3=l3, delta=delta, l1weights=l1weights, newl1=l1) adaptive_coefficients, adaptive_residuals = l.fit(tol=tol, initial=initial) print '\t---> Fitting Adaptive GraphNet problem with coordinate descent took: ', time.clock()-tic, 'seconds.' self.firstpass_coefficients = self.coefficients self.firstpass_residuals = self.residuals self.coefficients = adaptive_coefficients self.residuals = adaptive_residuals if scipy_compare: l1 = l1[-1] beta = self.coefficients[-1] print '\t---> Fitting with scipy for comparison...' tic = time.clock() if problemkey is 'RobustGraphNet': def f(beta): huber_sum = self.huber(Y - np.dot(X, beta), delta).sum()/2 beta_l1 = l1*np.dot(np.fabs(beta), l1weights) beta_l2 = l2*np.linalg.norm(beta)**2/2 beta_l3 = l3*np.dot(beta, np.dot(Afull, beta))/2 return huber_sum + beta_l1 + beta_l2 + beta_l3 elif problemkey is 'HuberSVMGraphNet': Xp2 = np.hstack([np.ones(X.shape[0])[:,np.newaxis], X]) def f(beta): ind = range(1, len(beta)) huber_err_sum = self.huber_svm_error(beta, Y, Xp2, delta).sum() beta_l1 = np.fabs(beta[ind]).sum()*l1 beta_l2 = l2*(np.linalg.norm(beta[ind])**2/2) beta_l3 = l3*(np.dot(beta[ind], np.dot(Afull, beta[ind])))/2 return huber_error_sum + beta_l1 + beta_l2 + beta_l3 elif problemkey is 'NaiveGraphNet': def f(beta): beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2 beta_l1 = l1*np.fabs(beta).sum() beta_l2 = l2*np.linalg.norm(beta)**2/2 beta_l3 = l3*np.dot(beta, np.dot(Afull, beta))/2 return beta_XY + beta_l1 + beta_l2 + beta_l3 elif problemkey is 'NaiveENet': def f(beta): beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2 beta_l1 = l1*np.fabs(beta).sum() beta_l2 = np.linalg.norm(beta)**2/2 elif problemkey is 'Lasso': def f(beta): beta_XY = np.linalg.norm(Y - np.dot(X, beta))**2/2 beta_l1 = l1*np.fabs(beta).sum() if problemkey is 'HuberSVMGraphNet': v = scipy.optimize.fmin_powell(f, np.zeros(Xp2.shape[1]), ftol=1.0e-14, xtol=1.0e-14, maxfun=100000) else: v = scipy.optimize.fmin_powell(f, np.zeros(X.shape[1]), ftol=1.0e-10, xtol=1.0e-10, maxfun=100000) v = np.asarray(v) print '\t---> Fitting GraphNet with scipy took: ', time.clock()-tic, 'seconds.' assert_true(np.fabs(f(v) - f(beta)) / np.fabs(f(v) + f(beta)) < tol) if np.linalg.norm(v) > 1e-8: assert_true(np.linalg.norm(v - beta) / np.linalg.norm(v) < tol) else: assert_true(np.linalg.norm(beta) < 1e-8) print '\t---> Coordinate-wise and Scipy optimization agree.' return self.coefficients
def fit_linearsvc(self, X, Y, class_weight='auto'): print 'fitting linearsvm' X = simple_normalize(X) clf = svm.LinearSVC(class_weight=class_weight) clf.fit(X, Y) return clf
def fit_svc(self, X, Y, cache_size=5000, class_weight='auto'): X = simple_normalize(X) clf = svm.SVC(cache_size=cache_size, class_weight=class_weight) clf.fit(X, Y) return clf