def svmfn(featureSelectionMethod = 'none',numFeaturesA = '330'): data = Data() gene_exp = data.get_gene_exp_matrix() labels = data.get_labels() names = data.get_gene_names() #USES a 1 vs 1 scheme - how does this work? clf = svm.SVC(C=125.,kernel='linear') #kernel can be poly, rbf, linear, sigmoid clfwrapper = OneVsRestClassifier(clf); #accuracy = leaveOneOutCrossValid(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeaturesA) #print 'accuracy is' #print accuracy trainingError =trainingSetPerformance(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeaturesA) #print 'accuracy is' #print accuracy print 'trainingError is' print trainingError
def svmfn(featureSelectionMethod = 'none',numFeatures = '330'): data = Data() #data = RaviNormal() gene_exp = data.get_gene_exp_matrix() labels = data.get_labels() names = data.get_gene_names() #clf = svm.LinearSVC(C=125.,penalty="l1",dual=False,class_weight='auto') clf = svm.LinearSVC(C=125,penalty="l1",dual=False) clfwrapper = OneVsRestClassifier(clf); #accuracy = leaveOneOutCrossValid(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeatures) trainingError =trainingSetPerformance(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeatures) print 'accuracy is' #print accuracy print 'trainingError is' print trainingError estimators = clfwrapper.estimators_ j = 0 totalGeneListLength = 0 for estimator in estimators: print 'estimator for class' print data.getCellName(j) i = range(0,len(estimator.coef_[0])) b = sorted(zip(estimator.coef_[0], i), reverse=True)[:80] #TODO CHANGE indices = data.indices_of_celltype(j) #print 'indices of this class:' #print indices arraysum = [0.0]*11927 arraysum = numpy.array(arraysum) for i in indices: arraysum = numpy.add(arraysum,gene_exp[i]) arrayavg = numpy.divide(arraysum,len(indices)) k = 0 geneList = [] while k<80 and b[k][0] > 0: #TODO CHANGE avg_expr = arrayavg[b[k][1]] geneStr = str(b[k][0])+',' +names[b[k][1]] + ':' + str(avg_expr) geneList = geneList + [geneStr] k = k+1 j = j+1 print geneList print len(geneList) totalGeneListLength += len(geneList) print 'avg gene signature size:' print totalGeneListLength/35