def CombineInputVectors(inputVectorFilenames, outputVectorFilename):
    outFile = open(outputVectorFilename, "w")
    for inFilename in inputVectorFilenames:
        inFile = open(inFilename, "r")
        outFile.write(inFile.read())
    outFile.close()

    from ConfigurationParser import getTVCs

    TVC = getTVCs(inputVectorFilenames)
    newTVC = sum(TVC.values())

    inHdrFile = open(inputVectorFilenames[0] + ".hdr", "r")
    import os

    outHdrFilename = os.path.abspath(outputVectorFilename) + ".hdr"
    outHdrFile = open(outHdrFilename, "w")

    outHdrFile.write(inHdrFile.readline())
    outHdrFile.write(inHdrFile.readline())
    outHdrFile.write("TVC {value}".format(value=newTVC))
    outHdrFile.close()

    outFileBase = outputVectorFilename
    outShuffleFileBase = outFileBase + "_ShuffledANN"

    shuffleVector = [
        "ShuffleVectorsModule "
        + " --outputVectorFileBaseName "
        + outShuffleFileBase
        + " --inputVectorFileBaseName "
        + outFileBase
        + " --resampleProportion 1 "
    ]
    import sys
    import subprocess

    try:
        subprocess.call(shuffleVector, shell=True)
    except:
        print(
            (
                """ERROR
              fail to run {str}""".format(
                    str=shuffleVector
                )
            )
        )
        sys.exit()
    return os.path.abspath(outShuffleFileBase), outShuffleFileBase + ".hdr"
Example #2
0
def BalanceInputVectors(inputVectorFilenames):
    ## read header file
    from ConfigurationParser import getTVCs

    TVC = getTVCs(inputVectorFilenames)
    print(TVC)
    print(TVC)
    print(TVC)
    print(TVC)
    print(TVC)
    import operator

    maxFile = max(iter(list(TVC.items())), key=operator.itemgetter(1))[0]
    # maxFile = max(TVC)
    print(maxFile)
    print(maxFile)
    print(maxFile)
    print(maxFile)

    maxTVC = TVC[maxFile]
    # print( "{file} = {tvc}".format( file=maxFile, tvc=TVC[maxFile]))

    outputVectorFilenames = {}
    outputVectorHdrFilenames = {}
    for inFile in inputVectorFilenames:
        outputVectorFilenames[inFile] = inFile + "_upsampled.txtANN"
        outputVectorHdrFilenames[inFile] = inFile + "_upsampled.txtANN.hdr"
    ## upsample all other files
    import subprocess
    import os

    for inputVectorFile in inputVectorFilenames:
        upsampleCMD = [
            "ShuffleVectorsModule " + " --outputVectorFileBaseName " +
            outputVectorFilenames[inputVectorFile] +
            " --inputVectorFileBaseName " + inputVectorFile +
            " --resampleProportion " +
            str(old_div(float(maxTVC), float(TVC[inputVectorFile])))
        ]
        print(("HACK:  UPSAMPPLING: {0}".format(upsampleCMD)))
        subprocess.call(upsampleCMD, shell=True)
        outputVectorFilenames[inFile] = os.path.abspath(
            outputVectorFilenames[inFile])

    ## return list of upsampled file names
    return list(outputVectorFilenames.values()), list(
        outputVectorHdrFilenames.values())
def BalanceInputVectors(inputVectorFilenames):
    ## read header file
    from ConfigurationParser import getTVCs

    TVC = getTVCs(inputVectorFilenames)
    print(TVC)
    print(TVC)
    print(TVC)
    print(TVC)
    print(TVC)
    import operator

    maxFile = max(iter(list(TVC.items())), key=operator.itemgetter(1))[0]
    # maxFile = max(TVC)
    print(maxFile)
    print(maxFile)
    print(maxFile)
    print(maxFile)

    maxTVC = TVC[maxFile]
    # print( "{file} = {tvc}".format( file=maxFile, tvc=TVC[maxFile]))

    outputVectorFilenames = {}
    outputVectorHdrFilenames = {}
    for inFile in inputVectorFilenames:
        outputVectorFilenames[inFile] = inFile + "_upsampled.txtANN"
        outputVectorHdrFilenames[inFile] = inFile + "_upsampled.txtANN.hdr"
    ## upsample all other files
    import subprocess
    import os

    for inputVectorFile in inputVectorFilenames:
        upsampleCMD = [
            "ShuffleVectorsModule "
            + " --outputVectorFileBaseName "
            + outputVectorFilenames[inputVectorFile]
            + " --inputVectorFileBaseName "
            + inputVectorFile
            + " --resampleProportion "
            + str(old_div(float(maxTVC), float(TVC[inputVectorFile])))
        ]
        print(("HACK:  UPSAMPPLING: {0}".format(upsampleCMD)))
        subprocess.call(upsampleCMD, shell=True)
        outputVectorFilenames[inFile] = os.path.abspath(outputVectorFilenames[inFile])

    ## return list of upsampled file names
    return list(outputVectorFilenames.values()), list(outputVectorHdrFilenames.values())
Example #4
0
def CombineInputVectors(inputVectorFilenames, outputVectorFilename):
    outFile = open(outputVectorFilename, "w")
    for inFilename in inputVectorFilenames:
        inFile = open(inFilename, "r")
        outFile.write(inFile.read())
    outFile.close()

    from ConfigurationParser import getTVCs

    TVC = getTVCs(inputVectorFilenames)
    newTVC = sum(TVC.values())

    inHdrFile = open(inputVectorFilenames[0] + ".hdr", "r")
    import os

    outHdrFilename = os.path.abspath(outputVectorFilename) + ".hdr"
    outHdrFile = open(outHdrFilename, "w")

    outHdrFile.write(inHdrFile.readline())
    outHdrFile.write(inHdrFile.readline())
    outHdrFile.write("TVC {value}".format(value=newTVC))
    outHdrFile.close()

    outFileBase = outputVectorFilename
    outShuffleFileBase = outFileBase + "_ShuffledANN"

    shuffleVector = [
        "ShuffleVectorsModule " + " --outputVectorFileBaseName " +
        outShuffleFileBase + " --inputVectorFileBaseName " + outFileBase +
        " --resampleProportion 1 "
    ]
    import sys
    import subprocess

    try:
        subprocess.call(shuffleVector, shell=True)
    except:
        print(("""ERROR
              fail to run {str}""".format(str=shuffleVector)))
        sys.exit()
    return os.path.abspath(outShuffleFileBase), outShuffleFileBase + ".hdr"