def condenseEdgeRemove(self, G_ContigRead, folderName, mummerLink, contigFilename): print "condenseEdgeRemove" thresPass = 100 thresForStrangeCut = 5000 ### kkdebug toRemoveList = [] for eachnode in self.graphNodesList: if len(eachnode.nodeIndexList) > 0: if len(eachnode.listOfNextNodes) ==1 : nextNodeIndex = eachnode.listOfNextNodes[0][0] nextNode= self.graphNodesList[nextNodeIndex] if len(nextNode.listOfPrevNodes) == 1 : currentName = eachnode.nodeIndex nextName = nextNode.nodeIndex contigReadPaths = findAllPathK(currentName,nextName, G_ContigRead, 5) cName = abunHouseKeeper.parseIDToName(currentName,'C',0) nName = abunHouseKeeper.parseIDToName(nextName,'C',0) noGoNext = self.readInJSON(folderName, "noGoNext.json") noGoPrev = self.readInJSON(folderName, "noGoPrev.json") overlap = [-1, -1] ctr = 0 for eachpath in contigReadPaths: if len(eachpath) > 2: ctr = ctr + 1 elif len(eachpath) == 2: contigName = cName leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) contigName = nName rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.align(leftSeg, rightSeg, folderName, mummerLink) if ctr <= thresPass and (cName in noGoNext or nName in noGoPrev or overlap[0] > thresForStrangeCut ): self.removeEdge(currentName, nextName) toRemoveList.append([currentName, nextName]) ### kkdebug #with open( "dataFolder/toRemoveList.json", 'w') as f: # json.dump(toRemoveList, f) self.findAdjList()
def condenseEdgeRemove(self, G_ContigRead, folderName, mummerLink, contigFilename): print "condenseEdgeRemove" thresPass = 100 thresForStrangeCut = 5000 ### kkdebug toRemoveList = [] for eachnode in self.graphNodesList: if len(eachnode.nodeIndexList) > 0: if len(eachnode.listOfNextNodes) ==1 : nextNodeIndex = eachnode.listOfNextNodes[0][0] nextNode= self.graphNodesList[nextNodeIndex] if len(nextNode.listOfPrevNodes) == 1 : currentName = eachnode.nodeIndex nextName = nextNode.nodeIndex contigReadPaths = findAllPathK(currentName,nextName, G_ContigRead, 5) cName = abunHouseKeeper.parseIDToName(currentName,'C',0) nName = abunHouseKeeper.parseIDToName(nextName,'C',0) noGoNext = self.readInJSON(folderName, "noGoNext.json") noGoPrev = self.readInJSON(folderName, "noGoPrev.json") overlap = [-1, -1] ctr = 0 for eachpath in contigReadPaths: if len(eachpath) > 2: ctr = ctr + 1 elif len(eachpath) == 2: contigName = cName leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) contigName = nName rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.align(leftSeg, rightSeg, folderName, mummerLink) if ctr <= thresPass and (cName in noGoNext or nName in noGoPrev or overlap[0] > thresForStrangeCut ): self.removeEdge(currentName, nextName) toRemoveList.append([currentName, nextName]) ### kkdebug #with open( "dataFolder/toRemoveList.json", 'w') as f: # json.dump(toRemoveList, f) self.findAdjList()
def singleGapLookUp(eachmatchpair,folderName, N1, mummerLink, contigReadGraph, contigFilename,readsetFilename): #print eachmatchpair leftCtgIndex ,rightCtgIndex, leftEnd, rightStart, middleContent = eachmatchpair[0],eachmatchpair[-1],0,0,"" succReadsList = [] G = seqGraphWt(0) G.loadFromFile(folderName, contigReadGraph) succReadsList = BFS(leftCtgIndex,rightCtgIndex, G, N1) if len(succReadsList) > 0: succReadsList.pop(0) succReadsList.pop(-1) else: print "interesting item for future study" print "succReadsList" , succReadsList if len(succReadsList) == 0: contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, 'C', N1) leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, 'C', N1) rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap contig : ", overlap leftEnd = len(leftSeg) - overlap[0] middleContent = "" else: contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, 'C', N1) print contigName leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) readName = abunHouseKeeper.parseIDToName(succReadsList[0], 'R', N1) print readName rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap start read : ", overlap leftEnd = len(leftSeg) - overlap[0] middleContent = "" for i in range(len(succReadsList)-1): readName = abunHouseKeeper.parseIDToName(succReadsList[i], 'R', N1) leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) readName = abunHouseKeeper.parseIDToName(succReadsList[i+1], 'R', N1) rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap middle read : ", overlap middleContent = middleContent + leftSeg[0:len(leftSeg)-overlap[0]] readName = abunHouseKeeper.parseIDToName(succReadsList[-1], 'R', N1) leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, 'C', N1) rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.alignWithName(leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap end read : ", overlap middleContent = middleContent + leftSeg[0:len(leftSeg)-overlap[0]] return [leftCtgIndex ,rightCtgIndex, leftEnd, rightStart, middleContent]
def singleGapLookUp(eachmatchpair, folderName, N1, mummerLink, contigReadGraph, contigFilename, readsetFilename): print eachmatchpair leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent = eachmatchpair[0], eachmatchpair[-1], 0, 0, "" succReadsList = abunGraphLib.findPathBtwEnds(folderName, leftCtgIndex, rightCtgIndex, contigReadGraph, N1) succReadsList = [] G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) allPaths = abunGraphLib.findAllPathK(leftCtgIndex, rightCtgIndex, G, 5) # shuffle(allPaths) print "allPaths", allPaths possibleList = [] for p in allPaths: noContig = True for pp in p[1:-1]: if pp < N1: noContig = False if noContig == True: possibleList.append(p) print "possibleList", possibleList minListLen = 1000 for p in possibleList: if len(p) < minListLen: succReadsList = p minListLen = len(p) if len(succReadsList) > 0: succReadsList.pop(0) succReadsList.pop(-1) else: print "interesting item for future study" print "succReadsList", succReadsList if len(succReadsList) == 0: contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1) leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1) rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap contig : ", overlap leftEnd = len(leftSeg) - overlap[0] middleContent = "" else: contigName = abunHouseKeeper.parseIDToName(leftCtgIndex, "C", N1) print contigName leftSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) readName = abunHouseKeeper.parseIDToName(succReadsList[0], "R", N1) print readName rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap start read : ", overlap leftEnd = len(leftSeg) - overlap[0] middleContent = "" for i in range(len(succReadsList) - 1): readName = abunHouseKeeper.parseIDToName(succReadsList[i], "R", N1) leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) readName = abunHouseKeeper.parseIDToName(succReadsList[i + 1], "R", N1) rightSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap middle read : ", overlap middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]] readName = abunHouseKeeper.parseIDToName(succReadsList[-1], "R", N1) leftSeg = IORobot.myRead(folderName, readsetFilename + "_Double.fasta", readName) contigName = abunHouseKeeper.parseIDToName(rightCtgIndex, "C", N1) rightSeg = IORobot.myRead(folderName, contigFilename + "_Double.fasta", contigName) overlap = IORobot.alignWithName( leftSeg, rightSeg, folderName, mummerLink, str(leftCtgIndex) + "_" + str(rightCtgIndex) ) print "overlap end read : ", overlap middleContent = middleContent + leftSeg[0 : len(leftSeg) - overlap[0]] return [leftCtgIndex, rightCtgIndex, leftEnd, rightStart, middleContent]