예제 #1
0
def testBroad():
    data = Data()
    geneExp = data.get_gene_exp_matrix()
    labels = data.get_labels()
    geneNames = data.get_gene_names()
    print(labels)
    print(geneExp[0])
    print(geneExp[5])
    print(geneExp[17])
    print(len(geneExp))
    print(len(geneExp[0]))
예제 #2
0
파일: ecoc.py 프로젝트: susanctu/229project
def ecocfn(code_size=52./38, C=50, linSVC_L1=False, kernel ='linear', selection='chi2',numFeatures=330):#try different code sizes 
    """l1Reg is used only if linSVC=True"""
    data = Data()
    gene_exp = data.get_gene_exp_matrix()
    labels = data.get_labels()
    if linSVC_L1:
        clf = svm.LinearSVC(C=C,penalty='l1',dual=False)
    else:
        clf = svm.SVC(C=C,kernel=kernel)
    ecocAlgo = OutputCodeClassifier(clf, code_size=code_size, random_state=0)
    print(leaveOneOutCrossValid(gene_exp,labels,ecocAlgo,selection=selection,numFeatures=numFeatures))
예제 #3
0
def svmfn(featureSelectionMethod = 'none',numFeaturesA = '330'):
    data = Data()
    gene_exp = data.get_gene_exp_matrix()
    labels = data.get_labels()
    names = data.get_gene_names()
    #USES a 1 vs 1 scheme - how does this work?
    clf = svm.SVC(C=125.,kernel='linear') #kernel can be poly, rbf, linear, sigmoid
    clfwrapper = OneVsRestClassifier(clf);
    #accuracy = leaveOneOutCrossValid(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeaturesA)     
    #print 'accuracy is'
    #print accuracy
    trainingError =trainingSetPerformance(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeaturesA)     
    #print 'accuracy is'
    #print accuracy
    print 'trainingError is'
    print trainingError
예제 #4
0
def svmfn(featureSelectionMethod = 'none',numFeatures = '330'):
    data = Data()
    #data = RaviNormal()
    gene_exp = data.get_gene_exp_matrix()
    labels = data.get_labels()
    names = data.get_gene_names()
    #clf = svm.LinearSVC(C=125.,penalty="l1",dual=False,class_weight='auto')
    clf = svm.LinearSVC(C=125,penalty="l1",dual=False)
    clfwrapper = OneVsRestClassifier(clf);
    #accuracy = leaveOneOutCrossValid(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeatures)     
    trainingError =trainingSetPerformance(gene_exp,labels,clfwrapper,names=names,selection=featureSelectionMethod,numFeatures=numFeatures)     
    print 'accuracy is'
    #print accuracy
    print 'trainingError is'
    print trainingError
    estimators = clfwrapper.estimators_
    j = 0 
    totalGeneListLength = 0
    for estimator in estimators:
	print 'estimator for class'
	print data.getCellName(j)
	i = range(0,len(estimator.coef_[0]))
	b = sorted(zip(estimator.coef_[0], i), reverse=True)[:80] #TODO CHANGE
	indices = data.indices_of_celltype(j)
	#print 'indices of this class:'
	#print indices
	arraysum = [0.0]*11927
	arraysum = numpy.array(arraysum)
	for i in indices:
		arraysum = numpy.add(arraysum,gene_exp[i])
	arrayavg = numpy.divide(arraysum,len(indices))
	k = 0
	geneList = []
	while k<80 and b[k][0] > 0: #TODO CHANGE
		avg_expr = arrayavg[b[k][1]]
		geneStr = str(b[k][0])+',' +names[b[k][1]] + ':' +  str(avg_expr)
		geneList = geneList + [geneStr]
		k = k+1
	j = j+1
	print geneList
	print len(geneList)
	totalGeneListLength  += len(geneList)
    print 'avg gene signature size:'
    print totalGeneListLength/35