def extractApproxRepeatStatistics(folderName, topX): ioLib.generateOneLineFile(folderName, folderName + "/genome.fasta") listOfData = ioLib.readFromMummerOutput(folderName + "/long_repeats.txt") listOfData = sorted(listOfData, key=itemgetter(2), reverse=True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) length = listOfData[0][2] for index in range(min(topX, len(listOfData))): start1 = listOfData[index][0] start2 = listOfData[index][1] print "Starting Location 1:", start1 print "Starting Location 2:", start2 print "" #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int( 1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt") #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index) graphPlottingLib.plotPatternBeyondRepeat( folderName + "/oneLine.fasta", folderName + "/oneLine2.fasta", start1 - int(1.5 * length), start1 - int(1.5 * length) - 1, start2 - int(1.5 * length), start2 - int(1.5 * length), int(length * 5 / 10))
def testCase(folderName): print "Debugging: preprocessingRepeatInfo ---------------" dataList = ioLib.preprocessingRepeatInfo(folderName) #dataList= ioLib.filterData(dataList) testRepeat = dataList[2] # E.g. ['EcoliK12/long_repeats.txt', 3465034, 2724834, 3027] ### Plot distance between consecutive rise print "Debugging: findapproxrepeatLength ---------------" startIndex1, startIndex2, lapprox, mutationRate = approximateRepeatLib.findapproxrepeatLength(folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta" , testRepeat[1], testRepeat[2], testRepeat[3]) # E.g. [2724708 3464908 3154 0.000317057704502] testApproxRepeat = [startIndex1, startIndex2, lapprox, mutationRate,testRepeat[3], testRepeat[1], testRepeat[2]] print "Debugging: plotdetailedPatternForTopRepeats2 ---------------" #approximateRepeatLib.plotdetailedPatternForTopRepeats(folderName, 1, [testApproxRepeat],inverted = False) approximateRepeatLib.plotdetailedPatternForTopRepeats(folderName, 1, [testApproxRepeat],inverted = False) ### Plot sliding hamming window print "Debugging: extractApproxRepeatStatistics ---------------" plt.figure(3) graphPlottingLib.plotPatternBeyondRepeat( folderName+ "/oneLine.fasta",folderName+ "/oneLine2.fasta", startIndex1 - int( 1.5*lapprox), startIndex1 - int( 1.5*lapprox), startIndex2 - int( 1.5*lapprox) , startIndex2 - int( 1.5*lapprox) ,int(lapprox*5/10)) ### Outputing values print "Debugging: reportPatternBeyondRepeat ---------------" approximateRepeatLib.reportPatternBeyondRepeat(folderName+ "/oneLine.fasta",folderName+ "/oneLine2.fasta", startIndex1 - int( 1.5*lapprox), startIndex1 - int( 1.5*lapprox), startIndex2 - int( 1.5*lapprox) , startIndex2 - int( 1.5*lapprox) ,int(lapprox*5/10),folderName+"\outputResult.txt") ### Edit distance Plot print "Debugging: findapproxrepeatLengthEditDistance ---------------" plt.figure(4) approximateRepeatLib.findapproxrepeatLengthEditDistance(folderName+ "/oneLine.fasta",folderName+ "/oneLine2.fasta", testRepeat[1],testRepeat[2], testRepeat[3]) ### plt.show()
def extractApproxRepeatStatistics(folderName, topX): ioLib.generateOneLineFile(folderName, folderName+ "/genome.fasta") listOfData = ioLib.readFromMummerOutput(folderName+ "/long_repeats.txt") listOfData= sorted(listOfData,key = itemgetter(2), reverse = True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) length = listOfData[0][2] for index in range(min(topX,len(listOfData))): start1 = listOfData[index][0] start2 = listOfData[index][1] print "Starting Location 1:" , start1 print "Starting Location 2:" , start2 print "" #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int( 1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt") #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index) graphPlottingLib.plotPatternBeyondRepeat( folderName+ "/oneLine.fasta",folderName+ "/oneLine2.fasta", start1 - int( 1.5*length), start1 - int( 1.5*length) -1, start2 - int( 1.5* length) , start2 - int( 1.5* length) ,int(length*5/10))