def CombineInputVectors(inputVectorFilenames, outputVectorFilename): outFile = open(outputVectorFilename, "w") for inFilename in inputVectorFilenames: inFile = open(inFilename, "r") outFile.write(inFile.read()) outFile.close() from ConfigurationParser import getTVCs TVC = getTVCs(inputVectorFilenames) newTVC = sum(TVC.values()) inHdrFile = open(inputVectorFilenames[0] + ".hdr", "r") import os outHdrFilename = os.path.abspath(outputVectorFilename) + ".hdr" outHdrFile = open(outHdrFilename, "w") outHdrFile.write(inHdrFile.readline()) outHdrFile.write(inHdrFile.readline()) outHdrFile.write("TVC {value}".format(value=newTVC)) outHdrFile.close() outFileBase = outputVectorFilename outShuffleFileBase = outFileBase + "_ShuffledANN" shuffleVector = [ "ShuffleVectorsModule " + " --outputVectorFileBaseName " + outShuffleFileBase + " --inputVectorFileBaseName " + outFileBase + " --resampleProportion 1 " ] import sys import subprocess try: subprocess.call(shuffleVector, shell=True) except: print( ( """ERROR fail to run {str}""".format( str=shuffleVector ) ) ) sys.exit() return os.path.abspath(outShuffleFileBase), outShuffleFileBase + ".hdr"
def BalanceInputVectors(inputVectorFilenames): ## read header file from ConfigurationParser import getTVCs TVC = getTVCs(inputVectorFilenames) print(TVC) print(TVC) print(TVC) print(TVC) print(TVC) import operator maxFile = max(iter(list(TVC.items())), key=operator.itemgetter(1))[0] # maxFile = max(TVC) print(maxFile) print(maxFile) print(maxFile) print(maxFile) maxTVC = TVC[maxFile] # print( "{file} = {tvc}".format( file=maxFile, tvc=TVC[maxFile])) outputVectorFilenames = {} outputVectorHdrFilenames = {} for inFile in inputVectorFilenames: outputVectorFilenames[inFile] = inFile + "_upsampled.txtANN" outputVectorHdrFilenames[inFile] = inFile + "_upsampled.txtANN.hdr" ## upsample all other files import subprocess import os for inputVectorFile in inputVectorFilenames: upsampleCMD = [ "ShuffleVectorsModule " + " --outputVectorFileBaseName " + outputVectorFilenames[inputVectorFile] + " --inputVectorFileBaseName " + inputVectorFile + " --resampleProportion " + str(old_div(float(maxTVC), float(TVC[inputVectorFile]))) ] print(("HACK: UPSAMPPLING: {0}".format(upsampleCMD))) subprocess.call(upsampleCMD, shell=True) outputVectorFilenames[inFile] = os.path.abspath( outputVectorFilenames[inFile]) ## return list of upsampled file names return list(outputVectorFilenames.values()), list( outputVectorHdrFilenames.values())
def BalanceInputVectors(inputVectorFilenames): ## read header file from ConfigurationParser import getTVCs TVC = getTVCs(inputVectorFilenames) print(TVC) print(TVC) print(TVC) print(TVC) print(TVC) import operator maxFile = max(iter(list(TVC.items())), key=operator.itemgetter(1))[0] # maxFile = max(TVC) print(maxFile) print(maxFile) print(maxFile) print(maxFile) maxTVC = TVC[maxFile] # print( "{file} = {tvc}".format( file=maxFile, tvc=TVC[maxFile])) outputVectorFilenames = {} outputVectorHdrFilenames = {} for inFile in inputVectorFilenames: outputVectorFilenames[inFile] = inFile + "_upsampled.txtANN" outputVectorHdrFilenames[inFile] = inFile + "_upsampled.txtANN.hdr" ## upsample all other files import subprocess import os for inputVectorFile in inputVectorFilenames: upsampleCMD = [ "ShuffleVectorsModule " + " --outputVectorFileBaseName " + outputVectorFilenames[inputVectorFile] + " --inputVectorFileBaseName " + inputVectorFile + " --resampleProportion " + str(old_div(float(maxTVC), float(TVC[inputVectorFile]))) ] print(("HACK: UPSAMPPLING: {0}".format(upsampleCMD))) subprocess.call(upsampleCMD, shell=True) outputVectorFilenames[inFile] = os.path.abspath(outputVectorFilenames[inFile]) ## return list of upsampled file names return list(outputVectorFilenames.values()), list(outputVectorHdrFilenames.values())
def CombineInputVectors(inputVectorFilenames, outputVectorFilename): outFile = open(outputVectorFilename, "w") for inFilename in inputVectorFilenames: inFile = open(inFilename, "r") outFile.write(inFile.read()) outFile.close() from ConfigurationParser import getTVCs TVC = getTVCs(inputVectorFilenames) newTVC = sum(TVC.values()) inHdrFile = open(inputVectorFilenames[0] + ".hdr", "r") import os outHdrFilename = os.path.abspath(outputVectorFilename) + ".hdr" outHdrFile = open(outHdrFilename, "w") outHdrFile.write(inHdrFile.readline()) outHdrFile.write(inHdrFile.readline()) outHdrFile.write("TVC {value}".format(value=newTVC)) outHdrFile.close() outFileBase = outputVectorFilename outShuffleFileBase = outFileBase + "_ShuffledANN" shuffleVector = [ "ShuffleVectorsModule " + " --outputVectorFileBaseName " + outShuffleFileBase + " --inputVectorFileBaseName " + outFileBase + " --resampleProportion 1 " ] import sys import subprocess try: subprocess.call(shuffleVector, shell=True) except: print(("""ERROR fail to run {str}""".format(str=shuffleVector))) sys.exit() return os.path.abspath(outShuffleFileBase), outShuffleFileBase + ".hdr"