def saveInfo(c): tDir = d['-o'][1] + "/" + "Arch_" + str(c + 1) try: # Save HTML file for best model of each fold an architecture os.mkdir(tDir) for fold in range(d['-kfold']): td = tDir + "/Fold_" + str(fold + 1) os.mkdir(td) sf.saveDetails(toArr[c][fold], td, d['-i'], [], d['-tss'], 1, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps']) except OSError: print "ERROR: Cannot create directory in", d['-o'][1] exit(2) if c == 0 or d['-v'] == 0: return for fold in range(d['-kfold']): # Save likelihood plot if -v flag is set for sd in range(d['-lcount']): if os.path.isfile(d['-o'][1] + "/." + str(c + 1) + "_" + str(fold + 1) + "_" + str(sd)): os.system("mv" + " " + d['-o'][1] + "/." + str(c + 1) + "_" + str(fold + 1) + "_" + str(sd) + " " + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName) os.system("gnuplot" + " " + "-e" + " " + "'filename=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName + "\"; var=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotLikelihoodImage + "\"'" + " " + d['-v'][0]) if len(d['-v']) != 1: os.system("gnuplot" + " " + "-e" + " " + "'filename=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName + "\"; var=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotLikelihoodImageEPS + "\"'" + " " + d['-v'][1]) os.system("rm" + " " + "-f" + " " + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName)
def getLabels(d): # Assign labels based on input model dirname = d['-o'][1] features = getFeatures(d['-f']) printLearnDetails(d['-o']) m = ld.learn(d['-f'], dirname + "/" + tempLabelsFile, d['-m'], features) print "\nModel learnt successfully.\nSaving details..." sf.saveDetails(m, dirname, d['-i'], [], d['-tss'], 0, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps']) del features, m print "Goodbye!" gc.collect() return dirname
def getLabels(d): # Assign labels based on input model dirname = d['-o'][1] features = getFeatures(d['-f']) printLearnDetails(d['-o']) m = ld.learn(d['-f'], dirname + "/" + tempLabelsFile, d['-m'], features, dirname + "/" + learnScoresFile) print "\nModel learnt successfully.\nSaving details..." sf.saveDetails(m, dirname, d['-i'], [], d['-tss'], 0, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps']) os.system("cp " + dirname + "/" + clusterDetailsFile + " " + dirname + "/" + clusterDetailsFile + "1") os.system("paste " + dirname + "/" + clusterDetailsFile + "1 " + dirname + "/" + learnScoresFile + " > " + dirname + "/" + clusterDetailsFile) os.system("rm " + dirname + "/" + learnScoresFile) os.system("rm " + dirname + "/" + clusterDetailsFile + "1") del features, m print "Goodbye!" gc.collect() return dirname
def getModel(d): # Learn best model dirname = d['-o'][1] n = mp.cpu_count() if d['-proc'] < n and d['-proc'] > 0: n = d['-proc'] if 3*d['-kfold']*d['-lcount'] < n: n = 3*d['-kfold']*d['-lcount'] d['-proc'] = n features = getFeatures(d['-f']) if d['-tss'] == 0: d['-tss'] = features/2 if d['-tss'] > features: print "ERROR: -tss is more than the length of sequences" exit(1) count = d['-maxarch'] - d['-minarch'] + 1 printDetails(d['-o'], count) saveSettings(d) m, cvals = ev.learn(d, dirname + "/" + tempLabelsFile, count) print "\n\nModel learnt successfully.\nSaving details..." sf.saveDetails(m, dirname, d['-i'], cvals, d['-tss'], 0, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps']) print "Goodbye!" gc.collect() return dirname
def learn(dt, outfile, count): global d, ds, trainSets, testSets, lcount lcount = dt['-lcount'] ds = getData(libctest.getData(dt['-f'], outfile)) pos = libctest.posList(ds.contents.n) # Learn best model directly if -minarch and -maxarch are same # if dt['-maxarch'] == dt['-minarch']: # d = dt # printBestModel(dt['-maxarch']) # m = learnModel(dt['-maxarch'], dt['-o'][1] + "/" + tempFile) # return m, [] for i in range(dt['-kfold']): # Get randomized train sets and test sets for every fold trainSets.append(getData(libctest.getTrainSubset(ds, i, dt['-kfold'], pos))) testSets.append(getData(libctest.getTestSubset(ds, i, dt['-kfold'], pos))) if dt['-lambda'] != -1: # Learn models by single lambda value m, cvals = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets) else: # Learn models by varying lambda print "\n\nTrying Lambda", 0, "\n\n" finalOut = dt['-o'][1] dt['-lambda'] = 0 dt['-outFile'] = finalOut dt['-o'][1] = finalOut + "/" + defaultLambdaFile + str(0) try: os.mkdir(dt['-o'][1]) except: print("ERROR: Cannot create directory " + d['-o'][1]) exit(2) os.system("cp " + finalOut + "/" + tempLabelsFile + " " + dt['-o'][1] + "/") m, cvals = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets) sf.saveDetails(m, dt['-o'][1] + "/", d['-i'], cvals, dt['-tss'], 0, dt['-plotExtra'], dt['-pCol'], dt['-sortBy'], dt['-eps']) bestCVL = best[1] i = 1 while(1): print "\n\nTrying Lambda", i, "\n\n" dt['-lambda'] = i dt['-o'][1] = finalOut + "/" + defaultLambdaFile + str(i) try: os.mkdir(dt['-o'][1]) except: print("ERROR: Cannot create directory " + d['-o'][1]) exit(2) os.system("cp " + finalOut + "/" + tempLabelsFile + " " + dt['-o'][1] + "/") m1, cvals1 = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets) sf.saveDetails(m1, dt['-o'][1] + "/", d['-i'], cvals1, dt['-tss'], 0, dt['-plotExtra'], dt['-pCol'], dt['-sortBy'], dt['-eps']) posMin = min(m1['m']['posCount']) if posMin == 0 or best[1] < bestCVL: # Exit when minimum number of important features is 0 or when best cross validation likelihood is lesser compared to the one by previous lambda del m1, cvals1 break bestCVL = best[1] del m, cvals m = m1 cvals = cvals1 if posMin < 5: break i = i + 2 for i in range(dt['-kfold']): libctest.freeData(trainSets[i]) libctest.freeData(testSets[i]) libctest.freeData(ds) del trainSets, testSets return m, cvals