bestROC = -numpy.Inf bestROC50 = -numpy.Inf bestSuccessRate = None bestW = None bestK1 = 0 bestK2 = 0 K1 = [8, 9, 10, 11, 12, 13] K2 = [8, 9, 10, 11, 12, 13] for k1 in K1: for k2 in [x for x in K2 if x >= k1]: for C in Cs: print "**** Train/Test with K1: " + str(k1) + ", k2: " + str(k2) + ", C: " + str(C) trainData = generate_model.get_spectrum_data( trainSeqFile, k1, k2, trainPosLen, trainNegLen, normalize=True, repeatCount=True ) s = svm.SVM(C=C) s.train(trainData) results = s.stratifiedCV(trainData, numFolds=numFolds, seen=1) labels = results.getGivenClass() roc = results.getROC() roc50 = results.getROCn() successRate = results.getSuccessRate() print "ROC: " + str(roc) + ", ROC50: " + str(roc50) + ", Success Rate: " + str(successRate) if roc > bestROC: # Delete the old data file. os.remove(dataFile) if os.path.exists(dataFile) else None # Save the training data for K1, K2 combination. trainData.save(dataFile)
confMap = ReadConfFile(sys.argv[1]) dataFile = confMap["data"]["dataFile"] k1 = int(confMap["kspectrum"]["k1"]) k2 = int(confMap["kspectrum"]["k2"]) C = float(confMap["kspectrum"]["C"]) posLen = int(confMap["data"]["posLen"]) negLen = int(confMap["data"]["negLen"]) numFolds = int(confMap["kspectrum"]["folds"]) featureFile = confMap["output"]["featureFile"] resultFile = confMap["output"]["resultsFile"] modelFile = confMap["output"]["modelFile"] buildModel = False trainData = generate_model.get_spectrum_data(dataFile, k1, k2, posLen, negLen, normalize=True, repeatCount=True) if buildModel: results, m = BuildModel(trainData, modelFile, numFolds) print_results(results, resultFile, k1, k2) find_features(trainData, featureFile, m.classifier.C) else: results = CrossValidate(trainData, numFolds) print_results(results, resultFile, k1, k2) find_features(trainData, featureFile)
outFeatureFile = sys.argv[5] bestC = None bestAUC = -numpy.Inf bestFP = None bestTP = None K1 = [7, 8, 9, 10, 11, 12, 13] K2 = [7, 8, 9, 10, 11, 12, 13] result_file = open("K-spectrum.txt", "w") for k1 in K1: for k2 in K2: for C in Cs: print "**** Train/Test with K1: " + str(k1) + ", k2: " + str(k2) + ", C: " + str(C) trainData = generate_model.get_spectrum_data(trainSeqFile, k1, k2, trainLen, trainLen, True) folds = [] s = svm.SVM(C=C) s.train(trainData) # testData = SparseDataSet(testFeatureFile); testData = demo_utils.get_spectrum_data(testSeqFile, k1, k2, testLen, testLen, True) results = s.test(testData) labels = results.getGivenClass() dvals = results.getDecisionFunction() folds.append((dvals, labels)) demo_utils.print_results(results) print "Results Log: " results.getLog()
from PyML import * import demo_utils import generate_model ## Program starts here. numArgs = len(sys.argv); print numArgs if numArgs < 4: print "USAGE: python createTestSpectrumData.py <test seq file> <lenTest> <k1> <k2> <testDataFile>" exit(1); testSeqFile = sys.argv[1]; testLen = int(sys.argv[2]); #equal pos/neg len k1 = int(sys.argv[3]); k2 = int(sys.argv[4]); testDatFile = sys.argv[5]; testData = generate_model.get_spectrum_data(testSeqFile, k1, k2, testLen, testLen, True); testData.save(testDatFile); if __name__ == '__main__': import sys combinedFile = sys.argv[1] print "Combined File: ", combinedFile; prefix = sys.argv[2] confFile = sys.argv[3] CreateConf(combinedFile, prefix, confFile)
confMap = readConfFile(configFile); return confMap; if __name__ == '__main__': import sys confMap = ReadConfFile(sys.argv[1]); dataFile = confMap["data"]["dataFile"] k1 = int(confMap["kspectrum"]["k1"]) k2 = int(confMap["kspectrum"]["k2"]) C = float(confMap["kspectrum"]["C"]) posLen = int(confMap["data"]["posLen"]) negLen = int(confMap["data"]["negLen"]) numFolds = int(confMap["kspectrum"]["folds"]); featureFile = confMap["output"]["featureFile"]; resultFile = confMap["output"]["resultsFile"] modelFile = confMap["output"]["modelFile"] buildModel = False; trainData = generate_model.get_spectrum_data(dataFile, k1, k2, posLen, negLen, normalize=True, repeatCount=False); if buildModel: results, m = BuildModel(trainData, modelFile, numFolds); print_results(results, resultFile, k1, k2) find_features(trainData, featureFile, m.classifier.C); else: results = CrossValidate(trainData, numFolds) print_results(results, resultFile, k1, k2) find_features(trainData, featureFile);
bestK1 = 0; bestK2 = 0; Cs = [ 10**x for x in xrange( -10, 5 ) ] K1 = [8, 9, 10, 11, 12, 13] K2 = [8, 9, 10, 11, 12, 13] Cs = [ 10**x for x in xrange( -10, 1 ) ] K1 = [8] K2 = [9] for k1 in K1: for k2 in [x for x in K2 if x >= k1]: for C in Cs: print "**** Train/Test with K1: " + str(k1) + ", k2: " + str(k2) +", C: " + str(C); trainData = generate_model.get_spectrum_data(trainSeqFile, k1, k2, trainPosLen, trainNegLen, normalize=True, repeatCount=True); s = svm.SVM(C=C); s.train(trainData) results = s.stratifiedCV(trainData, numFolds=numFolds, seen=1); labels = results.getGivenClass(); roc = results.getROC(); roc50 = results.getROCn(); successRate = results.getSuccessRate(); print "ROC: " + str(roc) + ", ROC50: " + str(roc50) + ", Success Rate: " + str(successRate); if roc > bestROC: # Delete the old data file. os.remove(dataFile) if os.path.exists(dataFile) else None # Save the training data for K1, K2 combination. trainData.save(dataFile);