def condenseEdgeRemove(self, G_ContigRead, folderName, mummerLink, contigFilename):
        print "condenseEdgeRemove"
        thresPass = 100
        thresForStrangeCut = 5000
        ### kkdebug

        toRemoveList = []
        
        for eachnode in self.graphNodesList:
            if len(eachnode.nodeIndexList) > 0:
                if len(eachnode.listOfNextNodes) ==1  :
                    nextNodeIndex = eachnode.listOfNextNodes[0][0]
                    nextNode= self.graphNodesList[nextNodeIndex]
                    if len(nextNode.listOfPrevNodes) == 1 : 
                        currentName = eachnode.nodeIndex
                        nextName =  nextNode.nodeIndex

                        contigReadPaths = findAllPathK(currentName,nextName, G_ContigRead, 5)

                        cName =  abunHouseKeeper.parseIDToName(currentName,'C',0)
                        nName =  abunHouseKeeper.parseIDToName(nextName,'C',0)

                        noGoNext = self.readInJSON(folderName, "noGoNext.json")
                        noGoPrev = self.readInJSON(folderName, "noGoPrev.json")

                        overlap = [-1, -1]
                        ctr = 0 

                        for eachpath in contigReadPaths:
                            if len(eachpath) > 2: 
                                ctr = ctr + 1 
                                
                            elif len(eachpath) == 2:     
                                
                                contigName = cName
                                leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

                                contigName = nName
                                rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)
                                
                                overlap = IORobot.align(leftSeg, rightSeg, folderName, mummerLink)


                        if ctr <= thresPass and  (cName in noGoNext or nName in noGoPrev or overlap[0] > thresForStrangeCut ):
                    
                            self.removeEdge(currentName, nextName)
                            toRemoveList.append([currentName, nextName])


        ### kkdebug
        #with open( "dataFolder/toRemoveList.json", 'w') as f:
        #    json.dump(toRemoveList, f)    

        self.findAdjList()
Beispiel #2
0
    def condenseEdgeRemove(self, G_ContigRead, folderName, mummerLink, contigFilename):
        print "condenseEdgeRemove"
        thresPass = 100
        thresForStrangeCut = 5000
        ### kkdebug

        toRemoveList = []
        
        for eachnode in self.graphNodesList:
            if len(eachnode.nodeIndexList) > 0:
                if len(eachnode.listOfNextNodes) ==1  :
                    nextNodeIndex = eachnode.listOfNextNodes[0][0]
                    nextNode= self.graphNodesList[nextNodeIndex]
                    if len(nextNode.listOfPrevNodes) == 1 : 
                        currentName = eachnode.nodeIndex
                        nextName =  nextNode.nodeIndex

                        contigReadPaths = findAllPathK(currentName,nextName, G_ContigRead, 5)

                        cName =  abunHouseKeeper.parseIDToName(currentName,'C',0)
                        nName =  abunHouseKeeper.parseIDToName(nextName,'C',0)

                        noGoNext = self.readInJSON(folderName, "noGoNext.json")
                        noGoPrev = self.readInJSON(folderName, "noGoPrev.json")

                        overlap = [-1, -1]
                        ctr = 0 

                        for eachpath in contigReadPaths:
                            if len(eachpath) > 2: 
                                ctr = ctr + 1 
                                
                            elif len(eachpath) == 2:     
                                
                                contigName = cName
                                leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

                                contigName = nName
                                rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)
                                
                                overlap = IORobot.align(leftSeg, rightSeg, folderName, mummerLink)


                        if ctr <= thresPass and  (cName in noGoNext or nName in noGoPrev or overlap[0] > thresForStrangeCut ):
                    
                            self.removeEdge(currentName, nextName)
                            toRemoveList.append([currentName, nextName])


        ### kkdebug
        #with open( "dataFolder/toRemoveList.json", 'w') as f:
        #    json.dump(toRemoveList, f)    

        self.findAdjList()
Beispiel #3
0
def singleGapLookUp(eachmatchpair,folderName, N1,  mummerLink,  contigReadGraph, contigFilename,readsetFilename):
    #print eachmatchpair
    leftCtgIndex ,rightCtgIndex, leftEnd, rightStart, middleContent = eachmatchpair[0],eachmatchpair[-1],0,0,""
    
    succReadsList = []
    G = seqGraphWt(0)
    G.loadFromFile(folderName, contigReadGraph)
    succReadsList = BFS(leftCtgIndex,rightCtgIndex, G, N1)

    if len(succReadsList) > 0:
        succReadsList.pop(0)
        succReadsList.pop(-1)
    else:
        print "interesting item for future study"

    print "succReadsList" , succReadsList
    
    if len(succReadsList) == 0:
        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, 'C', N1)
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, 'C', N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)
        
        overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) )
        
        print "overlap contig : ", overlap
        
        leftEnd = len(leftSeg) - overlap[0]
        middleContent = ""
        
    else:
        
        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, 'C', N1)
        print contigName
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)
        
        readName = abunHouseKeeper.parseIDToName(succReadsList[0], 'R', N1)
        print readName
        rightSeg  = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)
        
        overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) )
        
        print "overlap start read : ", overlap
        
        leftEnd = len(leftSeg) - overlap[0]
        
        middleContent = ""
        
        for i in range(len(succReadsList)-1):
            readName = abunHouseKeeper.parseIDToName(succReadsList[i], 'R', N1)
            leftSeg  = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)
        
            readName = abunHouseKeeper.parseIDToName(succReadsList[i+1], 'R', N1)
            rightSeg  = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)
            
            overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) )
            print "overlap middle read : ", overlap
            middleContent = middleContent + leftSeg[0:len(leftSeg)-overlap[0]] 
        
        
        readName = abunHouseKeeper.parseIDToName(succReadsList[-1], 'R', N1)
        leftSeg  = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)
        
        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, 'C', N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)
        
        overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) )
        print "overlap end read : ", overlap
        
        middleContent = middleContent + leftSeg[0:len(leftSeg)-overlap[0]]

    return [leftCtgIndex ,rightCtgIndex, leftEnd, rightStart, middleContent]
def singleGapLookUp(eachmatchpair, folderName, N1, mummerLink, contigReadGraph, contigFilename, readsetFilename):

    print eachmatchpair
    leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent = eachmatchpair[0], eachmatchpair[-1], 0, 0, ""

    succReadsList = abunGraphLib.findPathBtwEnds(folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1)

    succReadsList = []
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    allPaths = abunGraphLib.findAllPathK(leftCtgIndex, rightCtgIndex, G, 5)
    # shuffle(allPaths)

    print "allPaths", allPaths

    possibleList = []
    for p in allPaths:
        noContig = True
        for pp in p[1:-1]:
            if pp < N1:
                noContig = False
        if noContig == True:
            possibleList.append(p)
    print "possibleList", possibleList

    minListLen = 1000
    for p in possibleList:
        if len(p) < minListLen:
            succReadsList = p
            minListLen = len(p)

    if len(succReadsList) > 0:
        succReadsList.pop(0)
        succReadsList.pop(-1)
    else:
        print "interesting item for future study"

    print "succReadsList", succReadsList

    if len(succReadsList) == 0:
        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1)
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )

        print "overlap contig : ", overlap

        leftEnd = len(leftSeg) - overlap[0]
        middleContent = ""

    else:

        contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1)
        print contigName
        leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        readName = abunHouseKeeper.parseIDToName(succReadsList[0], "R", N1)
        print readName
        rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )

        print "overlap start read : ", overlap

        leftEnd = len(leftSeg) - overlap[0]

        middleContent = ""

        for i in range(len(succReadsList) - 1):
            readName = abunHouseKeeper.parseIDToName(succReadsList[i], "R", N1)
            leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

            readName = abunHouseKeeper.parseIDToName(succReadsList[i + 1], "R", N1)
            rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

            overlap = IORobot.alignWithName(
                leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
            )
            print "overlap middle read : ", overlap
            middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]]

        readName = abunHouseKeeper.parseIDToName(succReadsList[-1], "R", N1)
        leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName)

        contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1)
        rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName)

        overlap = IORobot.alignWithName(
            leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex)
        )
        print "overlap end read : ", overlap

        middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]]

    return [leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent]