def formPathSeq(folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile): ''' Input : directPathList, indirectPathList, contigFile, readFile Output: directPath.fasta, indirectPath.fasta ''' contigList = IORobot.readContigsFromFile(folderName,contigFile) readList = IORobot.readContigsFromFile(folderName,readFile) directPathSeqList = IORobot.pathListToSeqListTransform(directPathList, contigList, readList, mummerPath, folderName) indirectPathSeqList = IORobot.pathListToSeqListTransform(indirectPathList, contigList, readList, mummerPath, folderName) IORobot.writeSegOut(directPathSeqList,folderName,"directPath.fasta") IORobot.writeSegOut(indirectPathSeqList,folderName,"indirectPath.fasta")
def formPathSeq(folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile): ''' Input : directPathList, indirectPathList, contigFile, readFile Output: directPath.fasta, indirectPath.fasta ''' contigList = IORobot.readContigsFromFile(folderName,contigFile) readList = IORobot.readContigsFromFile(folderName,readFile) directPathSeqList = IORobot.pathListToSeqListTransform(directPathList, contigList, readList, mummerPath, folderName) indirectPathSeqList = IORobot.pathListToSeqListTransform(indirectPathList, contigList, readList, mummerPath, folderName) IORobot.writeSegOut(directPathSeqList,folderName,"directPath.fasta") IORobot.writeSegOut(indirectPathSeqList,folderName,"indirectPath.fasta")
def readContigForAbunSplit(folderName, mummerLink, contigFilename, readsetFilename, N1, contigReadGraph): json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) G = [] G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") gapContentLookUpDic = {} furtherGapList = [] for i in range(N1): if len(G.graphNodesList[i].nodeIndexList) > 1: for j in range(len(G.graphNodesList[i].nodeIndexList) - 1): bk, fwd = G.graphNodesList[i].nodeIndexList[ j], G.graphNodesList[i].nodeIndexList[j + 1] key = str(bk) + "_" + str(fwd) if not key in gapContentLookUpDic: furtherGapList.append([bk, fwd]) with open(folderName + "furtherGapList.json", 'w') as f: json.dump(furtherGapList, f) furtherGapContentLookUpList = generateGapContentLookup( folderName, mummerLink, furtherGapList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic) for eachitem in furtherGapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [ eachitem[2], eachitem[3], eachitem[4] ] print eachitem[2:4], len(eachitem[4]) #segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta") print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta") IORobot.extractGraphToContigs(G, folderName, mummerLink, "abunPre.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic) if True: nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink, "abunPre", "abunMum", "abun")
def readContigForAbunSplit(folderName, mummerLink, contigFilename, readsetFilename, N1, contigReadGraph): json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) G = [] G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") gapContentLookUpDic = {} furtherGapList = [] for i in range(N1): if len(G.graphNodesList[i].nodeIndexList) > 1: for j in range(len(G.graphNodesList[i].nodeIndexList) - 1): bk, fwd = G.graphNodesList[i].nodeIndexList[j], G.graphNodesList[i].nodeIndexList[j + 1] key = str(bk) + "_" + str(fwd) if not key in gapContentLookUpDic: furtherGapList.append([bk, fwd]) with open(folderName + "furtherGapList.json", "w") as f: json.dump(furtherGapList, f) furtherGapContentLookUpList = generateGapContentLookup( folderName, mummerLink, furtherGapList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic ) for eachitem in furtherGapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]] print eachitem[2:4], len(eachitem[4]) # segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta") print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta") IORobot.extractGraphToContigs( G, folderName, mummerLink, "abunPre.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic ) if True: nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink, "abunPre", "abunMum", "abun")
def mapStrangePairs(): folderName = "Apr10Test/" json_data = open(folderName + "furtherGapList.json", 'r') furtherGapList = json.load(json_data) segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta") f = open(folderName + "wrongCondense.fasta", 'w') ctr = 0 for eachitem in furtherGapList: beforeI, afterI = eachitem[0], eachitem[1] f.write(">Segkk"+str(ctr)+"\n") f.write(segLookUp[beforeI]+"\n") ctr = ctr + 1 f.write(">Segkk"+str(ctr)+"\n") f.write(segLookUp[afterI]+"\n") ctr = ctr + 1 f.close() if False: alignerRobot.useMummerAlign("/usr/bin/", folderName, "wrongCondenseDebug", "reference.fasta", "wrongCondense.fasta") dataList = alignerRobot.extractMumData(folderName, "wrongCondenseDebugOut") dataList.sort(key = itemgetter(-1)) mappedDic = {} for key, items in groupby(dataList, itemgetter(-1)): print "key", key matchLen = -1 for eachitem in items: if eachitem[-4] > matchLen: mappedDic[key] = eachitem matchLen = eachitem[-4] for eachitem in mappedDic: print "results : ", eachitem, mappedDic[eachitem]
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph, contigFilename, readsetFilename): N1 = len(myCountDic) * 2 print "N1", N1 # Debug G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) adj = [[] for i in range(N1)] for i in range(N1): adj[i] = abunGraphLib.findAllReachable(i, N1, G) Gnew = graphLib.seqGraph(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i, j, 1) Gnew.reportEdge() # End Debug if False: json_data = open(folderName + "phaseRepeat.txt", "r") repeatPairs = json.load(json_data) repeatPairs = obtainNonEmpty(repeatPairs) biResolvedCombineList = [] for eachitem in repeatPairs: inList, outList = eachitem[0], eachitem[1] resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1) biResolvedCombineList += resolvedList ### Xnode repeatResolution xResolvedList, mapDummyToRealDic = xNodeResolving(folderName, contigReadGraph) ### Combine resolution resolvedList = xResolvedList + biResolvedCombineList resolvedList = abunHouseKeeper.getDistinct(resolvedList) print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len( resolvedList ), len(xResolvedList), len(biResolvedCombineList) with open(folderName + "resolvedList.json", "w") as f: json.dump(resolvedList, f) with open(folderName + "mapDummyToRealDic.json", "w") as f: json.dump(mapDummyToRealDic, f) if False: json_data = open(folderName + "resolvedList.json", "r") resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) gapContentLookUpList = [] gapContentLookUpList = generateGapContentLookup( folderName, mummerLink, resolvedList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic ) gapContentLookUpDic = {} gapContentLookUpList.sort() for eachitem in gapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]] print eachitem[2:4], len(eachitem[4]) with open(folderName + "gapContentLookUpDic.json", "w") as f: json.dump(gapContentLookUpDic, f) if False: json_data = open(folderName + "resolvedList.json", "r") resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(N1 + len(mapDummyToRealDic)) addEdges(G, resolvedList) G.condense() G.saveToFile(folderName, "xResolvedGraph") if False: json_data = open(folderName + "mapDummyToRealDic.json", "r") mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") json_data = open(folderName + "gapContentLookUpDic.json", "r") gapContentLookUpDic = json.load(json_data) print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta") f = open(folderName + "tmpWithDummy.fasta", "a") for i in range(len(mapDummyToRealDic)): id = mapDummyToRealDic[str(i)] f.write(">SegDum" + str(i) + "\n") f.write(contigList[id] + "\n") f.close() IORobot.extractGraphToContigs( G, folderName, mummerLink, "abun.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic )
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph, contigFilename, readsetFilename): N1 = len(myCountDic) * 2 print "N1", N1 # Debug G = graphLib.seqGraph(0) G.loadFromFile(folderName, contigReadGraph) adj = [[] for i in range(N1)] for i in range(N1): adj[i] = abunGraphLib.findAllReachable(i, N1, G) Gnew = graphLib.seqGraph(N1) for i in range(N1): for j in adj[i]: Gnew.insertEdge(i, j, 1) Gnew.reportEdge() # End Debug if False: json_data = open(folderName + "phaseRepeat.txt", 'r') repeatPairs = json.load(json_data) repeatPairs = obtainNonEmpty(repeatPairs) biResolvedCombineList = [] for eachitem in repeatPairs: inList, outList = eachitem[0], eachitem[1] resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1) biResolvedCombineList += resolvedList ### Xnode repeatResolution xResolvedList, mapDummyToRealDic = xNodeResolving( folderName, contigReadGraph) ### Combine resolution resolvedList = xResolvedList + biResolvedCombineList resolvedList = abunHouseKeeper.getDistinct(resolvedList) print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len( resolvedList), len(xResolvedList), len(biResolvedCombineList) with open(folderName + "resolvedList.json", 'w') as f: json.dump(resolvedList, f) with open(folderName + "mapDummyToRealDic.json", 'w') as f: json.dump(mapDummyToRealDic, f) if False: json_data = open(folderName + "resolvedList.json", 'r') resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) gapContentLookUpList = [] gapContentLookUpList = generateGapContentLookup( folderName, mummerLink, resolvedList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic) gapContentLookUpDic = {} gapContentLookUpList.sort() for eachitem in gapContentLookUpList: gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [ eachitem[2], eachitem[3], eachitem[4] ] print eachitem[2:4], len(eachitem[4]) with open(folderName + "gapContentLookUpDic.json", 'w') as f: json.dump(gapContentLookUpDic, f) if False: json_data = open(folderName + "resolvedList.json", 'r') resolvedList = json.load(json_data) json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(N1 + len(mapDummyToRealDic)) addEdges(G, resolvedList) G.condense() G.saveToFile(folderName, "xResolvedGraph") if False: json_data = open(folderName + "mapDummyToRealDic.json", 'r') mapDummyToRealDic = json.load(json_data) G = graphLib.seqGraph(0) G.loadFromFile(folderName, "xResolvedGraph") json_data = open(folderName + "gapContentLookUpDic.json", 'r') gapContentLookUpDic = json.load(json_data) print "Final step: really hacking a file" os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta") contigList = IORobot.readContigsFromFile( folderName, contigFilename + "_Double.fasta") f = open(folderName + "tmpWithDummy.fasta", 'a') for i in range(len(mapDummyToRealDic)): id = mapDummyToRealDic[str(i)] f.write(">SegDum" + str(i) + "\n") f.write(contigList[id] + "\n") f.close() IORobot.extractGraphToContigs(G, folderName, mummerLink, "abun.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic)