def extractApproxRepeatStatistics(folderName, topX): ioLib.generateOneLineFile(folderName, folderName + "/genome.fasta") listOfData = ioLib.readFromMummerOutput(folderName + "/long_repeats.txt") listOfData = sorted(listOfData, key=itemgetter(2), reverse=True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) length = listOfData[0][2] for index in range(min(topX, len(listOfData))): start1 = listOfData[index][0] start2 = listOfData[index][1] print "Starting Location 1:", start1 print "Starting Location 2:", start2 print "" #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int( 1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt") #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index) graphPlottingLib.plotPatternBeyondRepeat( folderName + "/oneLine.fasta", folderName + "/oneLine2.fasta", start1 - int(1.5 * length), start1 - int(1.5 * length) - 1, start2 - int(1.5 * length), start2 - int(1.5 * length), int(length * 5 / 10))
def batchComputationOfStatistics(folders): for eachfolder in folders: ioLib.generateOneLineFile(eachfolder, eachfolder + "/genome.fasta") batchPatternBeyondRepeat(eachfolder + "/long_repeats.txt", eachfolder + "/oneLine.fasta", eachfolder + "/oneLine2.fasta", eachfolder + "/result")
def extractApproxRepeatStatistics(folderName, topX): ioLib.generateOneLineFile(folderName, folderName+ "/genome.fasta") listOfData = ioLib.readFromMummerOutput(folderName+ "/long_repeats.txt") listOfData= sorted(listOfData,key = itemgetter(2), reverse = True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) length = listOfData[0][2] for index in range(min(topX,len(listOfData))): start1 = listOfData[index][0] start2 = listOfData[index][1] print "Starting Location 1:" , start1 print "Starting Location 2:" , start2 print "" #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int( 1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt") #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index) graphPlottingLib.plotPatternBeyondRepeat( folderName+ "/oneLine.fasta",folderName+ "/oneLine2.fasta", start1 - int( 1.5*length), start1 - int( 1.5*length) -1, start2 - int( 1.5* length) , start2 - int( 1.5* length) ,int(length*5/10))
def batchComputationOfStatistics(folders): for eachfolder in folders: ioLib.generateOneLineFile(eachfolder, eachfolder+ "/genome.fasta") batchPatternBeyondRepeat(eachfolder + "/long_repeats.txt", eachfolder + "/oneLine.fasta", eachfolder + "/oneLine2.fasta" , eachfolder+ "/result")