def extractApproxRepeatStatistics(folderName, topX):
    ioLib.generateOneLineFile(folderName, folderName + "/genome.fasta")
    listOfData = ioLib.readFromMummerOutput(folderName + "/long_repeats.txt")
    listOfData = sorted(listOfData, key=itemgetter(2), reverse=True)

    if listOfData[0][0] == 1 and listOfData[0][1] == 1:
        listOfData.pop(0)

    length = listOfData[0][2]

    for index in range(min(topX, len(listOfData))):
        start1 = listOfData[index][0]
        start2 = listOfData[index][1]

        print "Starting Location 1:", start1
        print "Starting Location 2:", start2
        print ""

        #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int(  1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt")
        #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index)
        graphPlottingLib.plotPatternBeyondRepeat(
            folderName + "/oneLine.fasta", folderName + "/oneLine2.fasta",
            start1 - int(1.5 * length), start1 - int(1.5 * length) - 1,
            start2 - int(1.5 * length), start2 - int(1.5 * length),
            int(length * 5 / 10))
def testreadFromMummerOutput():    
    listOfData = ioLib.readFromMummerOutput("ecoli536")
    
    listOfData = sorted(listOfData, key = itemgetter(2,0,1)) ## sort by order of 2, 0, 1 in the list 
    for eachitem in listOfData:
        print eachitem
    graphPlottingLib.plotExactRepeatStatistics(listOfData)
def testIndelStatistics():
    listOfData = ioLib.readFromMummerOutput("ecoli536")
    listOfData = ioLib.filterData(listOfData)
    
    listOfData =ioLib.transformMummerOutput(listOfData) 
    listOfData = sorted(listOfData)
    
    listOfData= approximateRepeatLib.findApproximateIndelRepeatStatistics(listOfData, "oneLine.fasta","oneLine2.fasta")
    print listOfData
def batchPatternBeyondRepeat(sourceFile,genomeSource1, genomeSource2,outputfile):
    listOfData = ioLib.readFromMummerOutput(sourceFile)

    listOfData= sorted(listOfData,key = itemgetter(2), reverse = True)

    if listOfData[0][0] == 1 and listOfData[0][1] == 1:
        listOfData.pop(0)

    start1 = listOfData[0][0]    
    start2 = listOfData[0][1]    
    length = listOfData[0][2]
    
    print listOfData
    print start1
    print start2
    print length
    graphPlottingLib.plotPatternBeyondRepeat(genomeSource1,genomeSource2, start1 + int( length/2) ,start1 + int( length/2) +1 , start2 + int( length/2) ,start2 + int( length/2) +1 , int(length*5/10))
def extractApproxRepeatStatistics(folderName, topX):
    ioLib.generateOneLineFile(folderName, folderName+ "/genome.fasta")
    listOfData = ioLib.readFromMummerOutput(folderName+ "/long_repeats.txt")
    listOfData= sorted(listOfData,key = itemgetter(2), reverse = True)

    if listOfData[0][0] == 1 and listOfData[0][1] == 1:
        listOfData.pop(0)

    length = listOfData[0][2]

    for index in range(min(topX,len(listOfData))):
        start1 = listOfData[index][0]    
        start2 = listOfData[index][1]    
      
        print "Starting Location 1:" , start1
        print "Starting Location 2:" , start2
        print ""

        #reportPatternBeyondRepeat( folderName+ "/oneLine.fasta", folderName+ "/oneLine2.fasta", start1 - int( 1.5*length) ,start1 - int(  1.5*length) -1 , start2 - int( 1.5* length) ,start2 - int( 1.5* length) -1 , int(length*5/10),folderName+"/outputfile" + str(index)+".txt")
        #graphPlottingLib.readAndPlotStat(folderName+"/outputfile" + str(index)+".txt",folderName+"/outputfile" + str(index)+".txt2",folderName, index)
        graphPlottingLib.plotPatternBeyondRepeat( folderName+ "/oneLine.fasta",folderName+ "/oneLine2.fasta",  start1 - int( 1.5*length), start1 - int(  1.5*length) -1, start2 - int( 1.5* length) , start2 - int( 1.5* length) ,int(length*5/10))
def batchGenerateApproximateRepeatStatIndel(genomeSource1,genomeSource2,mummerOutput,HDrange):
    arrayOfListOfData = []    
   
    listOfData = ioLib.readFromMummerOutput(mummerOutput)
    listOfData = sorted(listOfData,key = itemgetter(2))
    listOfData = ioLib.filterData(listOfData)    
    listOfData = ioLib.transformMummerOutput(listOfData)  

    arrayOfListOfData.append(listOfData)
    
    for index in range(1,HDrange):
        listOfData= approximateRepeatLib.findApproximateIndelRepeatStatistics(listOfData, "oneLine.fasta","oneLine2.fasta")
        listOfData = ioLib.filterData(listOfData)
        arrayOfListOfData.append(listOfData)
        temp = listOfData[len(listOfData)-1]
        print "Approx repeat indel"
        print listOfData[len(listOfData)-5:len(listOfData)-1]
        #checking(temp,genomeSource1,genomeSource2 )
        print listOfData[len(listOfData)-2:len(listOfData)]

    plotgraph(arrayOfListOfData, HDrange) 
def batchGenerateApproximateRepeatStat(genomeSource1,genomeSource2,mummerOutput,HDrange):
    arrayOfListOfData = []    
    
    listOfData = ioLib.readFromMummerOutput(mummerOutput)
    listOfData = sorted(listOfData,key = itemgetter(2))
    listOfData = ioLib.filterData(listOfData)
    
    arrayOfListOfData.append(listOfData)
    
    
    ### Checking    
    temp = listOfData[len(listOfData)-1]
    #debuggingLib.checking(temp,genomeSource1,genomeSource2 )
    ### End Checking

    for index in range(1,HDrange):
        listOfData = approximateRepeatLib.findApproxRepeatStatistics(listOfData, genomeSource1, genomeSource2)
        listOfData = ioLib.filterData(listOfData)
        arrayOfListOfData.append(listOfData)
        
        temp = listOfData[len(listOfData)-1]
        print listOfData[len(listOfData)-5:len(listOfData)-1]
def batchGenerateApproximateRepeatStatIndel(genomeSource1, genomeSource2,
                                            mummerOutput, HDrange):
    arrayOfListOfData = []

    listOfData = ioLib.readFromMummerOutput(mummerOutput)
    listOfData = sorted(listOfData, key=itemgetter(2))
    listOfData = ioLib.filterData(listOfData)
    listOfData = ioLib.transformMummerOutput(listOfData)

    arrayOfListOfData.append(listOfData)

    for index in range(1, HDrange):
        listOfData = approximateRepeatLib.findApproximateIndelRepeatStatistics(
            listOfData, "oneLine.fasta", "oneLine2.fasta")
        listOfData = ioLib.filterData(listOfData)
        arrayOfListOfData.append(listOfData)
        temp = listOfData[len(listOfData) - 1]
        print "Approx repeat indel"
        print listOfData[len(listOfData) - 5:len(listOfData) - 1]
        #checking(temp,genomeSource1,genomeSource2 )
        print listOfData[len(listOfData) - 2:len(listOfData)]

    plotgraph(arrayOfListOfData, HDrange)
def batchGenerateApproximateRepeatStat(genomeSource1, genomeSource2,
                                       mummerOutput, HDrange):
    arrayOfListOfData = []

    listOfData = ioLib.readFromMummerOutput(mummerOutput)
    listOfData = sorted(listOfData, key=itemgetter(2))
    listOfData = ioLib.filterData(listOfData)

    arrayOfListOfData.append(listOfData)

    ### Checking
    temp = listOfData[len(listOfData) - 1]
    #debuggingLib.checking(temp,genomeSource1,genomeSource2 )
    ### End Checking

    for index in range(1, HDrange):
        listOfData = approximateRepeatLib.findApproxRepeatStatistics(
            listOfData, genomeSource1, genomeSource2)
        listOfData = ioLib.filterData(listOfData)
        arrayOfListOfData.append(listOfData)

        temp = listOfData[len(listOfData) - 1]
        print listOfData[len(listOfData) - 5:len(listOfData) - 1]
def batchPatternBeyondRepeat(sourceFile, genomeSource1, genomeSource2,
                             outputfile):
    listOfData = ioLib.readFromMummerOutput(sourceFile)

    listOfData = sorted(listOfData, key=itemgetter(2), reverse=True)

    if listOfData[0][0] == 1 and listOfData[0][1] == 1:
        listOfData.pop(0)

    start1 = listOfData[0][0]
    start2 = listOfData[0][1]
    length = listOfData[0][2]

    print listOfData
    print start1
    print start2
    print length
    graphPlottingLib.plotPatternBeyondRepeat(genomeSource1, genomeSource2,
                                             start1 + int(length / 2),
                                             start1 + int(length / 2) + 1,
                                             start2 + int(length / 2),
                                             start2 + int(length / 2) + 1,
                                             int(length * 5 / 10))