Example #1
0
def svmfn(featureSelectionMethod = 'none',numFeaturesA = '330'):
    data = Data()
    gene_exp = data.get_gene_exp_matrix()
    labels = data.get_labels()
    names = data.get_gene_names()
    #USES a 1 vs 1 scheme - how does this work?
    clf = svm.SVC(C=125.,kernel='linear') #kernel can be poly, rbf, linear, sigmoid
    clfwrapper = OneVsRestClassifier(clf);
    #accuracy = leaveOneOutCrossValid(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeaturesA)     
    #print 'accuracy is'
    #print accuracy
    trainingError =trainingSetPerformance(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeaturesA)     
    #print 'accuracy is'
    #print accuracy
    print 'trainingError is'
    print trainingError
Example #2
0
def svmfn(featureSelectionMethod = 'none',numFeatures = '330'):
    data = Data()
    #data = RaviNormal()
    gene_exp = data.get_gene_exp_matrix()
    labels = data.get_labels()
    names = data.get_gene_names()
    #clf = svm.LinearSVC(C=125.,penalty="l1",dual=False,class_weight='auto')
    clf = svm.LinearSVC(C=125,penalty="l1",dual=False)
    clfwrapper = OneVsRestClassifier(clf);
    #accuracy = leaveOneOutCrossValid(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeatures)     
    trainingError =trainingSetPerformance(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeatures)     
    print 'accuracy is'
    #print accuracy
    print 'trainingError is'
    print trainingError
    estimators = clfwrapper.estimators_
    j = 0 
    totalGeneListLength = 0
    for estimator in estimators:
	print 'estimator for class'
	print data.getCellName(j)
	i = range(0,len(estimator.coef_[0]))
	b = sorted(zip(estimator.coef_[0], i), reverse=True)[:80] #TODO CHANGE
	indices = data.indices_of_celltype(j)
	#print 'indices of this class:'
	#print indices
	arraysum = [0.0]*11927
	arraysum = numpy.array(arraysum)
	for i in indices:
		arraysum = numpy.add(arraysum,gene_exp[i])
	arrayavg = numpy.divide(arraysum,len(indices))
	k = 0
	geneList = []
	while k<80 and b[k][0] > 0: #TODO CHANGE
		avg_expr = arrayavg[b[k][1]]
		geneStr = str(b[k][0])+',' +names[b[k][1]] + ':' +  str(avg_expr)
		geneList = geneList + [geneStr]
		k = k+1
	j = j+1
	print geneList
	print len(geneList)
	totalGeneListLength  += len(geneList)
    print 'avg gene signature size:'
    print totalGeneListLength/35