Beispiel #1
0
def saveInfo(c):
    tDir = d['-o'][1] + "/" + "Arch_" + str(c + 1)
    try:
        # Save HTML file for best model of each fold an architecture

        os.mkdir(tDir)
        for fold in range(d['-kfold']):
            td = tDir + "/Fold_" + str(fold + 1)
            os.mkdir(td)
            sf.saveDetails(toArr[c][fold], td, d['-i'], [], d['-tss'], 1, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps'])
    except OSError:
        print "ERROR: Cannot create directory in", d['-o'][1]
        exit(2)
    if c == 0 or d['-v'] == 0: return
    for fold in range(d['-kfold']):

        # Save likelihood plot if -v flag is set

        for sd in range(d['-lcount']):
            if os.path.isfile(d['-o'][1] + "/." + str(c + 1) + "_" + str(fold + 1) + "_" + str(sd)):
                os.system("mv" + " " + d['-o'][1] + "/." + str(c + 1) + "_" + str(fold + 1) + "_"  + str(sd) + " " + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName)
        os.system("gnuplot" + " " + "-e" + " " + "'filename=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName + "\"; var=\"" + tDir + "/Fold_" + str(fold + 1)  + "/" + plotLikelihoodImage + "\"'" + " " + d['-v'][0])

        if len(d['-v']) != 1: os.system("gnuplot" + " " + "-e" + " " + "'filename=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName + "\"; var=\"" + tDir + "/Fold_" + str(fold + 1)  + "/" + plotLikelihoodImageEPS + "\"'" + " " + d['-v'][1])

        os.system("rm" + " " + "-f" + " " + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName)
Beispiel #2
0
def saveInfo(c):
    tDir = d['-o'][1] + "/" + "Arch_" + str(c + 1)
    try:
        # Save HTML file for best model of each fold an architecture

        os.mkdir(tDir)
        for fold in range(d['-kfold']):
            td = tDir + "/Fold_" + str(fold + 1)
            os.mkdir(td)
            sf.saveDetails(toArr[c][fold], td, d['-i'], [], d['-tss'], 1, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps'])
    except OSError:
        print "ERROR: Cannot create directory in", d['-o'][1]
        exit(2)
    if c == 0 or d['-v'] == 0: return
    for fold in range(d['-kfold']):

        # Save likelihood plot if -v flag is set

        for sd in range(d['-lcount']):
            if os.path.isfile(d['-o'][1] + "/." + str(c + 1) + "_" + str(fold + 1) + "_" + str(sd)):
                os.system("mv" + " " + d['-o'][1] + "/." + str(c + 1) + "_" + str(fold + 1) + "_"  + str(sd) + " " + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName)
        os.system("gnuplot" + " " + "-e" + " " + "'filename=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName + "\"; var=\"" + tDir + "/Fold_" + str(fold + 1)  + "/" + plotLikelihoodImage + "\"'" + " " + d['-v'][0])

        if len(d['-v']) != 1: os.system("gnuplot" + " " + "-e" + " " + "'filename=\"" + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName + "\"; var=\"" + tDir + "/Fold_" + str(fold + 1)  + "/" + plotLikelihoodImageEPS + "\"'" + " " + d['-v'][1])

        os.system("rm" + " " + "-f" + " " + tDir + "/Fold_" + str(fold + 1) + "/" + plotFileHiddenName)
Beispiel #3
0
def getLabels(d):    # Assign labels based on input model
    dirname = d['-o'][1]
    features = getFeatures(d['-f'])
    printLearnDetails(d['-o'])
    m = ld.learn(d['-f'], dirname + "/" + tempLabelsFile, d['-m'], features)
    print "\nModel learnt successfully.\nSaving details..."
    sf.saveDetails(m, dirname, d['-i'], [], d['-tss'], 0, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps'])
    del features, m
    print "Goodbye!"
    gc.collect()
    return dirname
Beispiel #4
0
def getLabels(d):    # Assign labels based on input model
    dirname = d['-o'][1]
    features = getFeatures(d['-f'])
    printLearnDetails(d['-o'])
    m = ld.learn(d['-f'], dirname + "/" + tempLabelsFile, d['-m'], features, dirname + "/" + learnScoresFile)
    print "\nModel learnt successfully.\nSaving details..."
    sf.saveDetails(m, dirname, d['-i'], [], d['-tss'], 0, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps'])
    os.system("cp " + dirname + "/" + clusterDetailsFile + " " + dirname + "/" + clusterDetailsFile + "1")
    os.system("paste " + dirname + "/" + clusterDetailsFile + "1 " + dirname + "/" + learnScoresFile + " > " + dirname + "/" + clusterDetailsFile)
    os.system("rm " + dirname + "/" + learnScoresFile)
    os.system("rm " + dirname + "/" + clusterDetailsFile + "1")
    del features, m
    print "Goodbye!"
    gc.collect()
    return dirname
Beispiel #5
0
def getModel(d):    # Learn best model
    dirname = d['-o'][1]
    n = mp.cpu_count()
    if d['-proc'] < n and d['-proc'] > 0: n = d['-proc']
    if 3*d['-kfold']*d['-lcount'] < n: n = 3*d['-kfold']*d['-lcount']
    d['-proc'] = n
    features = getFeatures(d['-f'])
    if d['-tss'] == 0: d['-tss'] = features/2
    if d['-tss'] > features:
        print "ERROR: -tss is more than the length of sequences"
        exit(1)
    count = d['-maxarch'] - d['-minarch'] + 1
    printDetails(d['-o'], count)
    saveSettings(d)
    m, cvals = ev.learn(d, dirname + "/" + tempLabelsFile, count)
    print "\n\nModel learnt successfully.\nSaving details..."
    sf.saveDetails(m, dirname, d['-i'], cvals, d['-tss'], 0, d['-plotExtra'], d['-pCol'], d['-sortBy'], d['-eps'])
    print "Goodbye!"
    gc.collect()
    return dirname
Beispiel #6
0
def learn(dt, outfile, count):
    global d, ds, trainSets, testSets, lcount
    lcount = dt['-lcount']
    ds = getData(libctest.getData(dt['-f'], outfile))
    pos = libctest.posList(ds.contents.n)

    # Learn best model directly if -minarch and -maxarch are same

    # if dt['-maxarch'] == dt['-minarch']:
    #     d = dt
    #     printBestModel(dt['-maxarch'])
    #     m = learnModel(dt['-maxarch'], dt['-o'][1] + "/" + tempFile)
    #     return m, []
    for i in range(dt['-kfold']):    # Get randomized train sets and test sets for every fold
        trainSets.append(getData(libctest.getTrainSubset(ds, i, dt['-kfold'], pos)))
        testSets.append(getData(libctest.getTestSubset(ds, i, dt['-kfold'], pos)))


    if dt['-lambda'] != -1:    # Learn models by single lambda value
        m, cvals = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets)
    else:    # Learn models by varying lambda
        print "\n\nTrying Lambda", 0, "\n\n"
        finalOut = dt['-o'][1]
        dt['-lambda'] = 0
        dt['-outFile'] = finalOut
        dt['-o'][1] = finalOut + "/" + defaultLambdaFile + str(0)
        try:
            os.mkdir(dt['-o'][1])
        except:
            print("ERROR: Cannot create directory " + d['-o'][1])
            exit(2)
        os.system("cp " + finalOut + "/" + tempLabelsFile + " " + dt['-o'][1] + "/")
        
        m, cvals = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets)
        sf.saveDetails(m, dt['-o'][1] + "/", d['-i'], cvals, dt['-tss'], 0, dt['-plotExtra'], dt['-pCol'], dt['-sortBy'], dt['-eps'])
        bestCVL = best[1]
        i = 1
        while(1):
            print "\n\nTrying Lambda", i, "\n\n"
            dt['-lambda'] = i
            dt['-o'][1] = finalOut + "/" + defaultLambdaFile + str(i)
            try:
                os.mkdir(dt['-o'][1])
            except:
                print("ERROR: Cannot create directory " + d['-o'][1])
                exit(2)
            os.system("cp " + finalOut + "/" + tempLabelsFile + " " + dt['-o'][1] + "/")
            m1, cvals1 = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets)
            sf.saveDetails(m1, dt['-o'][1] + "/", d['-i'], cvals1, dt['-tss'], 0, dt['-plotExtra'], dt['-pCol'], dt['-sortBy'], dt['-eps'])
            posMin = min(m1['m']['posCount'])
            if posMin == 0 or best[1] < bestCVL:    # Exit when minimum number of important features is 0 or when best cross validation likelihood is lesser compared to the one by previous lambda
                del m1, cvals1
                break
            bestCVL = best[1]
            del m, cvals
            m = m1
            cvals = cvals1
            if posMin < 5: break
            i = i + 2
    for i in range(dt['-kfold']):
        libctest.freeData(trainSets[i])
        libctest.freeData(testSets[i])
    libctest.freeData(ds)
    del trainSets, testSets
    return m, cvals
Beispiel #7
0
def learn(dt, outfile, count):
    global d, ds, trainSets, testSets, lcount
    lcount = dt['-lcount']
    ds = getData(libctest.getData(dt['-f'], outfile))
    pos = libctest.posList(ds.contents.n)

    # Learn best model directly if -minarch and -maxarch are same

    # if dt['-maxarch'] == dt['-minarch']:
    #     d = dt
    #     printBestModel(dt['-maxarch'])
    #     m = learnModel(dt['-maxarch'], dt['-o'][1] + "/" + tempFile)
    #     return m, []
    for i in range(dt['-kfold']):    # Get randomized train sets and test sets for every fold
        trainSets.append(getData(libctest.getTrainSubset(ds, i, dt['-kfold'], pos)))
        testSets.append(getData(libctest.getTestSubset(ds, i, dt['-kfold'], pos)))


    if dt['-lambda'] != -1:    # Learn models by single lambda value
        m, cvals = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets)
    else:    # Learn models by varying lambda
        print "\n\nTrying Lambda", 0, "\n\n"
        finalOut = dt['-o'][1]
        dt['-lambda'] = 0
        dt['-outFile'] = finalOut
        dt['-o'][1] = finalOut + "/" + defaultLambdaFile + str(0)
        try:
            os.mkdir(dt['-o'][1])
        except:
            print("ERROR: Cannot create directory " + d['-o'][1])
            exit(2)
        os.system("cp " + finalOut + "/" + tempLabelsFile + " " + dt['-o'][1] + "/")
        
        m, cvals = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets)
        sf.saveDetails(m, dt['-o'][1] + "/", d['-i'], cvals, dt['-tss'], 0, dt['-plotExtra'], dt['-pCol'], dt['-sortBy'], dt['-eps'])
        bestCVL = best[1]
        i = 1
        while(1):
            print "\n\nTrying Lambda", i, "\n\n"
            dt['-lambda'] = i
            dt['-o'][1] = finalOut + "/" + defaultLambdaFile + str(i)
            try:
                os.mkdir(dt['-o'][1])
            except:
                print("ERROR: Cannot create directory " + d['-o'][1])
                exit(2)
            os.system("cp " + finalOut + "/" + tempLabelsFile + " " + dt['-o'][1] + "/")
            m1, cvals1 = learnDiffLambda(dt, outfile, count, ds, trainSets, testSets)
            sf.saveDetails(m1, dt['-o'][1] + "/", d['-i'], cvals1, dt['-tss'], 0, dt['-plotExtra'], dt['-pCol'], dt['-sortBy'], dt['-eps'])
            posMin = min(m1['m']['posCount'])
            if posMin == 0 or best[1] < bestCVL:    # Exit when minimum number of important features is 0 or when best cross validation likelihood is lesser compared to the one by previous lambda
                del m1, cvals1
                break
            bestCVL = best[1]
            del m, cvals
            m = m1
            cvals = cvals1
            if posMin < 5: break
            i = i + 2
    for i in range(dt['-kfold']):
        libctest.freeData(trainSets[i])
        libctest.freeData(testSets[i])
    libctest.freeData(ds)
    del trainSets, testSets
    return m, cvals