Ejemplo n.º 1
0
bestROC = -numpy.Inf
bestROC50 = -numpy.Inf
bestSuccessRate = None
bestW = None
bestK1 = 0
bestK2 = 0
K1 = [8, 9, 10, 11, 12, 13]
K2 = [8, 9, 10, 11, 12, 13]


for k1 in K1:
    for k2 in [x for x in K2 if x >= k1]:
        for C in Cs:
            print "**** Train/Test with K1: " + str(k1) + ", k2: " + str(k2) + ", C: " + str(C)
            trainData = generate_model.get_spectrum_data(
                trainSeqFile, k1, k2, trainPosLen, trainNegLen, normalize=True, repeatCount=True
            )
            s = svm.SVM(C=C)
            s.train(trainData)
            results = s.stratifiedCV(trainData, numFolds=numFolds, seen=1)
            labels = results.getGivenClass()

            roc = results.getROC()
            roc50 = results.getROCn()
            successRate = results.getSuccessRate()
            print "ROC: " + str(roc) + ", ROC50: " + str(roc50) + ", Success Rate: " + str(successRate)
            if roc > bestROC:
                # Delete the old data file.
                os.remove(dataFile) if os.path.exists(dataFile) else None
                # Save the training data for K1, K2 combination.
                trainData.save(dataFile)
Ejemplo n.º 2
0
    confMap = ReadConfFile(sys.argv[1])

    dataFile = confMap["data"]["dataFile"]
    k1 = int(confMap["kspectrum"]["k1"])
    k2 = int(confMap["kspectrum"]["k2"])
    C = float(confMap["kspectrum"]["C"])
    posLen = int(confMap["data"]["posLen"])
    negLen = int(confMap["data"]["negLen"])
    numFolds = int(confMap["kspectrum"]["folds"])
    featureFile = confMap["output"]["featureFile"]
    resultFile = confMap["output"]["resultsFile"]
    modelFile = confMap["output"]["modelFile"]

    buildModel = False

    trainData = generate_model.get_spectrum_data(dataFile,
                                                 k1,
                                                 k2,
                                                 posLen,
                                                 negLen,
                                                 normalize=True,
                                                 repeatCount=True)
    if buildModel:
        results, m = BuildModel(trainData, modelFile, numFolds)
        print_results(results, resultFile, k1, k2)
        find_features(trainData, featureFile, m.classifier.C)
    else:
        results = CrossValidate(trainData, numFolds)
        print_results(results, resultFile, k1, k2)
        find_features(trainData, featureFile)
Ejemplo n.º 3
0
outFeatureFile = sys.argv[5]


bestC = None
bestAUC = -numpy.Inf
bestFP = None
bestTP = None
K1 = [7, 8, 9, 10, 11, 12, 13]
K2 = [7, 8, 9, 10, 11, 12, 13]

result_file = open("K-spectrum.txt", "w")
for k1 in K1:
    for k2 in K2:
        for C in Cs:
            print "**** Train/Test with K1: " + str(k1) + ", k2: " + str(k2) + ", C: " + str(C)
            trainData = generate_model.get_spectrum_data(trainSeqFile, k1, k2, trainLen, trainLen, True)
            folds = []
            s = svm.SVM(C=C)
            s.train(trainData)

            # testData = SparseDataSet(testFeatureFile);
            testData = demo_utils.get_spectrum_data(testSeqFile, k1, k2, testLen, testLen, True)

            results = s.test(testData)
            labels = results.getGivenClass()
            dvals = results.getDecisionFunction()
            folds.append((dvals, labels))

            demo_utils.print_results(results)
            print "Results Log: "
            results.getLog()
Ejemplo n.º 4
0
from PyML import *
import demo_utils
import generate_model




## Program starts here.
numArgs = len(sys.argv);
print numArgs
if numArgs < 4:
	print "USAGE: python createTestSpectrumData.py <test seq file> <lenTest> <k1> <k2> <testDataFile>"
	exit(1);

testSeqFile = sys.argv[1];
testLen = int(sys.argv[2]); #equal pos/neg len
k1 = int(sys.argv[3]);
k2 = int(sys.argv[4]);
testDatFile = sys.argv[5];

testData = generate_model.get_spectrum_data(testSeqFile, k1, k2, testLen, testLen, True);
testData.save(testDatFile);

if __name__ == '__main__':
	import sys
	combinedFile = sys.argv[1]
	print "Combined File: ", combinedFile;
	prefix = sys.argv[2]
	confFile = sys.argv[3]
	CreateConf(combinedFile, prefix, confFile)
Ejemplo n.º 5
0
	confMap = readConfFile(configFile);
	return confMap;


if __name__ == '__main__':
	import sys
	confMap = ReadConfFile(sys.argv[1]);

	dataFile = confMap["data"]["dataFile"]
	k1 = int(confMap["kspectrum"]["k1"])
	k2 = int(confMap["kspectrum"]["k2"])
	C = float(confMap["kspectrum"]["C"])
	posLen = int(confMap["data"]["posLen"])
	negLen = int(confMap["data"]["negLen"])
	numFolds = int(confMap["kspectrum"]["folds"]);
	featureFile = confMap["output"]["featureFile"];
	resultFile = confMap["output"]["resultsFile"]
	modelFile = confMap["output"]["modelFile"]

	buildModel = False;

	trainData = generate_model.get_spectrum_data(dataFile, k1, k2, posLen, negLen, normalize=True, repeatCount=False);
	if buildModel:
		results, m = BuildModel(trainData, modelFile, numFolds);
		print_results(results, resultFile, k1, k2)
		find_features(trainData, featureFile, m.classifier.C);
	else:
		results = CrossValidate(trainData, numFolds)
		print_results(results, resultFile, k1, k2)
		find_features(trainData, featureFile);
Ejemplo n.º 6
0
bestK1 = 0;
bestK2 = 0;
Cs = [ 10**x for x in xrange( -10, 5 ) ]
K1 = [8, 9, 10, 11, 12, 13]
K2 = [8, 9, 10, 11, 12, 13]

Cs = [ 10**x for x in xrange( -10, 1 ) ]
K1 = [8]
K2 = [9]


for k1 in K1:
   for k2 in [x for x in K2 if x >= k1]:
      for C in Cs:
         print "**** Train/Test with K1: " + str(k1) + ", k2: " + str(k2) +", C: " + str(C);
         trainData = generate_model.get_spectrum_data(trainSeqFile, k1, k2, trainPosLen, trainNegLen, normalize=True, repeatCount=True);
         s = svm.SVM(C=C); 
         s.train(trainData)
         results = s.stratifiedCV(trainData, numFolds=numFolds, seen=1);
         labels = results.getGivenClass();

         roc  = results.getROC();
         roc50 = results.getROCn();
         successRate = results.getSuccessRate();
         print "ROC: " + str(roc)  + ", ROC50: " + str(roc50) + ", Success Rate: " + str(successRate);
         if roc > bestROC: 
            # Delete the old data file.
            os.remove(dataFile) if os.path.exists(dataFile) else None
            # Save the training data for K1, K2 combination.
            trainData.save(dataFile);