def xResolve(self, xResolvedList): self.mapDummyToRealDic = {} self.xResolvedSimplifiedList = [] for i in range(self.N1): for y in xResolvedList[i]: inNode = y[0] outNode = y[-1] for eachprev in self.graphNodesList[i].listOfPrevNodes: key = eachprev[0] if key >= self.N1 and self.mapDummyToRealDic[key-self.N1][0] == y[0]: inNode = key for eachnext in self.graphNodesList[i].listOfNextNodes: key = eachnext[0] if key >= self.N1 and self.mapDummyToRealDic[key-self.N1][0] == y[-1]: outNode = key if len(self.graphNodesList[i].listOfPrevNodes) == 1 and len(self.graphNodesList[i].listOfNextNodes) ==1: self.xResolvedSimplifiedList.append([inNode, i]) self.xResolvedSimplifiedList.append([i, outNode]) else: self.graphNodesList.append(graphLib.seqGraphNode(self.N1+self.runningCtr)) if False: toRemoveList = [] for outgo in self.graphNodesList[inNode].listOfNextNodes: toRemoveList.append([inNode, outgo[0]]) for income in self.graphNodesList[outNode].listOfPrevNodes: toRemoveList.append([income[0], outNode]) for eachpair in toRemoveList: self.removeEdge(eachpair[0], eachpair[1]) else: self.removeEdge(inNode,i ) self.removeEdge(i, outNode) self.insertEdge(inNode, self.N1 + self.runningCtr, 1997) self.insertEdge(self.N1 + self.runningCtr, outNode, 1997) self.mapDummyToRealDic[self.runningCtr] = [ [i] , self.graphNodesList[i].nodeIndexList ] self.xResolvedSimplifiedList.append([inNode, self.N1 + self.runningCtr]) self.xResolvedSimplifiedList.append([self.N1 + self.runningCtr, outNode]) self.runningCtr = self.runningCtr + 1
def outputResults(folderName, mummerLink, toPhaseList, N1, G): ''' Algorithm : a) Write as contigs b) Add back reverse complement c) Create G2 as the readOut part d) Output the contigs by a function call ''' # a) combinedName = "contigAndRead_Double.fasta" os.system("cp " + folderName + "improved3_Double.fasta " + folderName + combinedName) fout = open(folderName + combinedName, 'a') fin = open(folderName + "phasingSeedName_Double.fasta", 'r') tmp = fin.readline().rstrip() while len(tmp) > 0: if tmp[0] != ">": fout.write(tmp + "\n") else: infoArr = tmp[5:].split("_") fout.write(">Contig" + str(int(infoArr[0]) + N1 / 2)) fout.write("_" + infoArr[1] + "\n") tmp = fin.readline().rstrip() fin.close() fout.close() # b) ''' [28], [[2, 690, 28], [6, 126, 28], [28, 212, 0], [28, 216, 4]], 1 [2 , 690, 28, 212, 0] ''' completePhaseList = [] for eachitem in toPhaseList: repeat = eachitem[-3] flanking = eachitem[-2] result = eachitem[-1] revrepeat = [] for eachsub in eachitem[-3][-1::-1]: revrepeat.append(eachsub + pow(-1, eachsub)) revflanking = [[] for i in range(4)] for j in range(2): for eachsub in eachitem[-2][j + 2][-1::-1]: revflanking[j].append(eachsub + pow(-1, eachsub)) for eachsub in eachitem[-2][j][-1::-1]: revflanking[j + 2].append(eachsub + pow(-1, eachsub)) revresult = eachitem[-1] completePhaseList.append([repeat, flanking, result]) completePhaseList.append([revrepeat, revflanking, revresult]) print "completePhaseList", completePhaseList # c) G2 = graphLib.seqGraph(N1) nameDic = {} for i in range(N1): nameDic[i] = i for eachitem in completePhaseList: repeat, flanking, result = eachitem[0] , eachitem[1] , eachitem[2] path = [[], []] if result == 0: path[0] = flanking[0][0:-1] + repeat + flanking[2][1:] path[1] = flanking[1][0:-1] + repeat + flanking[3][1:] else: path[0] = flanking[0][0:-1] + repeat + flanking[3][1:] path[1] = flanking[1][0:-1] + repeat + flanking[2][1:] print path[0] , path[1] for i in range(2): eachpath = path[i] currentNode = G2.graphNodesList[eachpath[0]] for nextNodeIndex, ctr in zip(eachpath[1:], range(len(eachpath[1:]))): if ctr != len(eachpath[1:]) - 1: myindex = len(G2.graphNodesList) nameDic[myindex] = nextNodeIndex newNode = graphLib.seqGraphNode(myindex) G2.graphNodesList.append(newNode) else: newNode = G2.graphNodesList[nextNodeIndex] wt = 0 for eachck in G.graphNodesList[nameDic[currentNode.nodeIndex]].listOfNextNodes: if eachck[0] == nextNodeIndex: wt = eachck[1] break newNode.listOfPrevNodes.append([currentNode.nodeIndex, wt]) currentNode.listOfNextNodes.append([newNode.nodeIndex, wt]) currentNode = newNode graphFileName = "phaseGraphFinal" G2.condense() G2.saveToFile(folderName, graphFileName) IORobot.readContigOut(folderName, mummerLink, graphFileName, combinedName, "improved4.fasta", "outOpenListphaing", nameDic)