def extractApproxRepeatStatistics(folderName, topX): ioLib.generateOneLineFile(folderName, folderName + "/genome.fasta") listOfData = ioLib.readFromMummerOutput(folderName + "/long_repeats.txt") listOfData = sorted(listOfData, key=itemgetter(2), reverse=True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) length = listOfData[0][2] for index in range(min(topX, len(listOfData))): start1 = listOfData[index][0] start2 = listOfData[index][1] print "Starting Location 1:", start1 print "Starting Location 2:", start2 print "" #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int( 1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt") #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index) graphPlottingLib.plotPatternBeyondRepeat( folderName + "/oneLine.fasta", folderName + "/oneLine2.fasta", start1 - int(1.5 * length), start1 - int(1.5 * length) - 1, start2 - int(1.5 * length), start2 - int(1.5 * length), int(length * 5 / 10))
def testreadFromMummerOutput(): listOfData = ioLib.readFromMummerOutput("ecoli536") listOfData = sorted(listOfData, key = itemgetter(2,0,1)) ## sort by order of 2, 0, 1 in the list for eachitem in listOfData: print eachitem graphPlottingLib.plotExactRepeatStatistics(listOfData)
def testIndelStatistics(): listOfData = ioLib.readFromMummerOutput("ecoli536") listOfData = ioLib.filterData(listOfData) listOfData =ioLib.transformMummerOutput(listOfData) listOfData = sorted(listOfData) listOfData= approximateRepeatLib.findApproximateIndelRepeatStatistics(listOfData, "oneLine.fasta","oneLine2.fasta") print listOfData
def batchPatternBeyondRepeat(sourceFile,genomeSource1, genomeSource2,outputfile): listOfData = ioLib.readFromMummerOutput(sourceFile) listOfData= sorted(listOfData,key = itemgetter(2), reverse = True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) start1 = listOfData[0][0] start2 = listOfData[0][1] length = listOfData[0][2] print listOfData print start1 print start2 print length graphPlottingLib.plotPatternBeyondRepeat(genomeSource1,genomeSource2, start1 + int( length/2) ,start1 + int( length/2) +1 , start2 + int( length/2) ,start2 + int( length/2) +1 , int(length*5/10))
def extractApproxRepeatStatistics(folderName, topX): ioLib.generateOneLineFile(folderName, folderName+ "/genome.fasta") listOfData = ioLib.readFromMummerOutput(folderName+ "/long_repeats.txt") listOfData= sorted(listOfData,key = itemgetter(2), reverse = True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) length = listOfData[0][2] for index in range(min(topX,len(listOfData))): start1 = listOfData[index][0] start2 = listOfData[index][1] print "Starting Location 1:" , start1 print "Starting Location 2:" , start2 print "" #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int( 1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt") #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index) graphPlottingLib.plotPatternBeyondRepeat( folderName+ "/oneLine.fasta",folderName+ "/oneLine2.fasta", start1 - int( 1.5*length), start1 - int( 1.5*length) -1, start2 - int( 1.5* length) , start2 - int( 1.5* length) ,int(length*5/10))
def batchGenerateApproximateRepeatStatIndel(genomeSource1,genomeSource2,mummerOutput,HDrange): arrayOfListOfData = [] listOfData = ioLib.readFromMummerOutput(mummerOutput) listOfData = sorted(listOfData,key = itemgetter(2)) listOfData = ioLib.filterData(listOfData) listOfData = ioLib.transformMummerOutput(listOfData) arrayOfListOfData.append(listOfData) for index in range(1,HDrange): listOfData= approximateRepeatLib.findApproximateIndelRepeatStatistics(listOfData, "oneLine.fasta","oneLine2.fasta") listOfData = ioLib.filterData(listOfData) arrayOfListOfData.append(listOfData) temp = listOfData[len(listOfData)-1] print "Approx repeat indel" print listOfData[len(listOfData)-5:len(listOfData)-1] #checking(temp,genomeSource1,genomeSource2 ) print listOfData[len(listOfData)-2:len(listOfData)] plotgraph(arrayOfListOfData, HDrange)
def batchGenerateApproximateRepeatStat(genomeSource1,genomeSource2,mummerOutput,HDrange): arrayOfListOfData = [] listOfData = ioLib.readFromMummerOutput(mummerOutput) listOfData = sorted(listOfData,key = itemgetter(2)) listOfData = ioLib.filterData(listOfData) arrayOfListOfData.append(listOfData) ### Checking temp = listOfData[len(listOfData)-1] #debuggingLib.checking(temp,genomeSource1,genomeSource2 ) ### End Checking for index in range(1,HDrange): listOfData = approximateRepeatLib.findApproxRepeatStatistics(listOfData, genomeSource1, genomeSource2) listOfData = ioLib.filterData(listOfData) arrayOfListOfData.append(listOfData) temp = listOfData[len(listOfData)-1] print listOfData[len(listOfData)-5:len(listOfData)-1]
def batchGenerateApproximateRepeatStatIndel(genomeSource1, genomeSource2, mummerOutput, HDrange): arrayOfListOfData = [] listOfData = ioLib.readFromMummerOutput(mummerOutput) listOfData = sorted(listOfData, key=itemgetter(2)) listOfData = ioLib.filterData(listOfData) listOfData = ioLib.transformMummerOutput(listOfData) arrayOfListOfData.append(listOfData) for index in range(1, HDrange): listOfData = approximateRepeatLib.findApproximateIndelRepeatStatistics( listOfData, "oneLine.fasta", "oneLine2.fasta") listOfData = ioLib.filterData(listOfData) arrayOfListOfData.append(listOfData) temp = listOfData[len(listOfData) - 1] print "Approx repeat indel" print listOfData[len(listOfData) - 5:len(listOfData) - 1] #checking(temp,genomeSource1,genomeSource2 ) print listOfData[len(listOfData) - 2:len(listOfData)] plotgraph(arrayOfListOfData, HDrange)
def batchGenerateApproximateRepeatStat(genomeSource1, genomeSource2, mummerOutput, HDrange): arrayOfListOfData = [] listOfData = ioLib.readFromMummerOutput(mummerOutput) listOfData = sorted(listOfData, key=itemgetter(2)) listOfData = ioLib.filterData(listOfData) arrayOfListOfData.append(listOfData) ### Checking temp = listOfData[len(listOfData) - 1] #debuggingLib.checking(temp,genomeSource1,genomeSource2 ) ### End Checking for index in range(1, HDrange): listOfData = approximateRepeatLib.findApproxRepeatStatistics( listOfData, genomeSource1, genomeSource2) listOfData = ioLib.filterData(listOfData) arrayOfListOfData.append(listOfData) temp = listOfData[len(listOfData) - 1] print listOfData[len(listOfData) - 5:len(listOfData) - 1]
def batchPatternBeyondRepeat(sourceFile, genomeSource1, genomeSource2, outputfile): listOfData = ioLib.readFromMummerOutput(sourceFile) listOfData = sorted(listOfData, key=itemgetter(2), reverse=True) if listOfData[0][0] == 1 and listOfData[0][1] == 1: listOfData.pop(0) start1 = listOfData[0][0] start2 = listOfData[0][1] length = listOfData[0][2] print listOfData print start1 print start2 print length graphPlottingLib.plotPatternBeyondRepeat(genomeSource1, genomeSource2, start1 + int(length / 2), start1 + int(length / 2) + 1, start2 + int(length / 2), start2 + int(length / 2) + 1, int(length * 5 / 10))