def ordinary_logistic(data): X = data.get_gene_exp_matrix() y = data.get_labels() logReg = LogisticRegression(C=1000000,tol=0.01,dual=True)#large C is less regularization, according to their docs (what is C?) logis_eval = kFoldCrossValid(X,y,logReg) eval_sum = reduce(lambda x,y: [a+b for a,b in zip(x,y)],logis_eval) eval_avg = map(lambda x: x/len(logis_eval),eval_sum) print(eval_avg)
def main(): data = TCGAData() gene_exp = data.get_gene_exp_matrix() labels = data.get_labels() gnb = GaussianNB() accuracy = kFoldCrossValid(gene_exp,labels,gnb) print(accuracy)
def svmfn(featureSelectionMethod = 'none'): data = TCGAData() gene_exp = data.get_gene_exp_matrix() labels = data.get_labels() names = data.get_gene_names() clf = svm.SVC(gamma=0.001,C=100.) #these are the values in some random example, idk what C is accuracy = kFoldCrossValid(gene_exp,labels,clf,k=4,names=names,selection=featureSelectionMethod) print(accuracy)
def with_l1_penalty(data,C_list):#data should be TCGAData object """Adapted from sklearn website, # Authors: Alexandre Gramfort <*****@*****.**> # Mathieu Blondel <*****@*****.**> # Andreas Mueller <*****@*****.**> # License: BSD Style. """ X = data.get_gene_exp_matrix() y = data.get_labels() evaluations = [] sparsityList = [] for C in C_list: l1_logReg = LogisticRegression(C=C, penalty='l1', tol=0.01) sparsities = kFoldGetSparsity(data,l1_logReg) sparsity = sum(sparsities)/len(sparsities) sparsityList.append(sparsity) print "C=%.4f" % C print "Sparsity with L1 penalty: %.2f%%" % sparsity #print "Sparsity with L2 penalty: %.2f%%" % sparsity_l2_LR #print "score with L2 penalty: %.4f" % l2_logReg.score(X, y) l1_eval = kFoldCrossValid(X,y,l1_logReg) l1_eval_sum = reduce(lambda x,y: [a+b for a,b in zip(x,y)],l1_eval) l1_eval_avg = map(lambda x: x/len(l1_eval),l1_eval_sum) evaluations.append(l1_eval_avg) #l2_accuracy = kFoldCrossValid(X,y,l2_logReg) #print(l2_accuracy) C_list = np.array(C_list) print(C_list) precisions = [100*evaluations[i][1] for i in range(0,len(C_list))] print(precisions) recalls = [100*evaluations[i][2] for i in range(0,len(C_list))] print(recalls) fig = pl.figure() precis_recall = fig.add_subplot(211) sparse = fig.add_subplot(212) precis_recall.set_title('Precision/Recall with Varying C') precis_recall.plot(C_list,precisions,color="red") precis_recall.plot(C_list,recalls,color="blue") precis_recall.set_xlabel("C") precis_recall.set_ylabel("Precision (red)/Recall (blue)") sparse.set_title('Sparsity with Varying C') sparse.plot(C_list,sparsityList,color="black") sparse.set_xlabel("C") sparse.set_ylabel("Sparsity") pl.show()