Ejemplo n.º 1
0
def formPathSeq(folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile):
    '''
    Input : directPathList, indirectPathList, contigFile, readFile
    Output: directPath.fasta, indirectPath.fasta
    '''

    contigList = IORobot.readContigsFromFile(folderName,contigFile)
    readList = IORobot.readContigsFromFile(folderName,readFile)
    
    directPathSeqList =  IORobot.pathListToSeqListTransform(directPathList, contigList, readList, mummerPath, folderName)    
    indirectPathSeqList =  IORobot.pathListToSeqListTransform(indirectPathList, contigList, readList, mummerPath, folderName)    

    IORobot.writeSegOut(directPathSeqList,folderName,"directPath.fasta")
    IORobot.writeSegOut(indirectPathSeqList,folderName,"indirectPath.fasta")
Ejemplo n.º 2
0
def formPathSeq(folderName, mummerPath, directPathList, indirectPathList, contigFile, readFile):
    '''
    Input : directPathList, indirectPathList, contigFile, readFile
    Output: directPath.fasta, indirectPath.fasta
    '''

    contigList = IORobot.readContigsFromFile(folderName,contigFile)
    readList = IORobot.readContigsFromFile(folderName,readFile)
    
    directPathSeqList =  IORobot.pathListToSeqListTransform(directPathList, contigList, readList, mummerPath, folderName)    
    indirectPathSeqList =  IORobot.pathListToSeqListTransform(indirectPathList, contigList, readList, mummerPath, folderName)    

    IORobot.writeSegOut(directPathSeqList,folderName,"directPath.fasta")
    IORobot.writeSegOut(indirectPathSeqList,folderName,"indirectPath.fasta")
Ejemplo n.º 3
0
def readContigForAbunSplit(folderName, mummerLink, contigFilename,
                           readsetFilename, N1, contigReadGraph):

    json_data = open(folderName + "mapDummyToRealDic.json", 'r')
    mapDummyToRealDic = json.load(json_data)

    G = []
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, "xResolvedGraph")

    gapContentLookUpDic = {}

    furtherGapList = []
    for i in range(N1):
        if len(G.graphNodesList[i].nodeIndexList) > 1:
            for j in range(len(G.graphNodesList[i].nodeIndexList) - 1):

                bk, fwd = G.graphNodesList[i].nodeIndexList[
                    j], G.graphNodesList[i].nodeIndexList[j + 1]

                key = str(bk) + "_" + str(fwd)

                if not key in gapContentLookUpDic:
                    furtherGapList.append([bk, fwd])

    with open(folderName + "furtherGapList.json", 'w') as f:
        json.dump(furtherGapList, f)

    furtherGapContentLookUpList = generateGapContentLookup(
        folderName, mummerLink, furtherGapList, contigReadGraph,
        contigFilename, readsetFilename, mapDummyToRealDic)

    for eachitem in furtherGapContentLookUpList:
        gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [
            eachitem[2], eachitem[3], eachitem[4]
        ]
        print eachitem[2:4], len(eachitem[4])

    #segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta")

    print "Final step: really hacking a file"
    os.system("cp " + folderName + contigFilename + "_Double.fasta " +
              folderName + "tmpWithDummy.fasta")
    contigList = IORobot.readContigsFromFile(folderName,
                                             contigFilename + "_Double.fasta")

    IORobot.extractGraphToContigs(G, folderName, mummerLink, "abunPre.fasta",
                                  "tmpWithDummy.fasta", gapContentLookUpDic,
                                  mapDummyToRealDic)

    if True:
        nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink,
                                                     "abunPre", "abunMum",
                                                     "abun")
Ejemplo n.º 4
0
def readContigForAbunSplit(folderName, mummerLink, contigFilename, readsetFilename, N1, contigReadGraph):

    json_data = open(folderName + "mapDummyToRealDic.json", "r")
    mapDummyToRealDic = json.load(json_data)

    G = []
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, "xResolvedGraph")

    gapContentLookUpDic = {}

    furtherGapList = []
    for i in range(N1):
        if len(G.graphNodesList[i].nodeIndexList) > 1:
            for j in range(len(G.graphNodesList[i].nodeIndexList) - 1):

                bk, fwd = G.graphNodesList[i].nodeIndexList[j], G.graphNodesList[i].nodeIndexList[j + 1]

                key = str(bk) + "_" + str(fwd)

                if not key in gapContentLookUpDic:
                    furtherGapList.append([bk, fwd])

    with open(folderName + "furtherGapList.json", "w") as f:
        json.dump(furtherGapList, f)

    furtherGapContentLookUpList = generateGapContentLookup(
        folderName, mummerLink, furtherGapList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic
    )

    for eachitem in furtherGapContentLookUpList:
        gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]]
        print eachitem[2:4], len(eachitem[4])

    # segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta")

    print "Final step: really hacking a file"
    os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta")
    contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta")

    IORobot.extractGraphToContigs(
        G, folderName, mummerLink, "abunPre.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic
    )

    if True:
        nonRedundantResolver.removeRedundantWithFile(folderName, mummerLink, "abunPre", "abunMum", "abun")
Ejemplo n.º 5
0
def mapStrangePairs():
	folderName = "Apr10Test/"
	
	json_data = open(folderName + "furtherGapList.json", 'r')
	furtherGapList = json.load(json_data)
	
	segLookUp = IORobot.readContigsFromFile(folderName, "LC_n_Double.fasta")
	
	f = open(folderName + "wrongCondense.fasta", 'w')
	ctr = 0
	for eachitem in furtherGapList:
		beforeI, afterI = eachitem[0], eachitem[1]
		
		f.write(">Segkk"+str(ctr)+"\n")
		f.write(segLookUp[beforeI]+"\n")
		ctr = ctr + 1 
		
		f.write(">Segkk"+str(ctr)+"\n")
		f.write(segLookUp[afterI]+"\n")
		ctr = ctr + 1 
	
	f.close()
	
	if False:
		alignerRobot.useMummerAlign("/usr/bin/", folderName, "wrongCondenseDebug", "reference.fasta", "wrongCondense.fasta")
	
	dataList = alignerRobot.extractMumData(folderName, "wrongCondenseDebugOut")
	
	dataList.sort(key = itemgetter(-1))
	
	mappedDic = {}
	
	for key, items in groupby(dataList, itemgetter(-1)):
		print "key", key
		matchLen = -1
		
		for eachitem in items: 
			if eachitem[-4] > matchLen:
				mappedDic[key]  = eachitem
				matchLen = eachitem[-4]
				
	
	for eachitem in mappedDic:
		print "results : ", eachitem, mappedDic[eachitem]
Ejemplo n.º 6
0
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph, contigFilename, readsetFilename):
    N1 = len(myCountDic) * 2
    print "N1", N1

    # Debug
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    adj = [[] for i in range(N1)]

    for i in range(N1):
        adj[i] = abunGraphLib.findAllReachable(i, N1, G)

    Gnew = graphLib.seqGraph(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1)

    Gnew.reportEdge()
    # End Debug

    if False:
        json_data = open(folderName + "phaseRepeat.txt", "r")
        repeatPairs = json.load(json_data)
        repeatPairs = obtainNonEmpty(repeatPairs)

        biResolvedCombineList = []
        for eachitem in repeatPairs:
            inList, outList = eachitem[0], eachitem[1]
            resolvedList = determindMatch(inList, outList, myCountDic, folderName, contigReadGraph, N1)

            biResolvedCombineList += resolvedList

        ### Xnode repeatResolution
        xResolvedList, mapDummyToRealDic = xNodeResolving(folderName, contigReadGraph)

        ### Combine resolution
        resolvedList = xResolvedList + biResolvedCombineList
        resolvedList = abunHouseKeeper.getDistinct(resolvedList)
        print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len(
            resolvedList
        ), len(xResolvedList), len(biResolvedCombineList)

        with open(folderName + "resolvedList.json", "w") as f:
            json.dump(resolvedList, f)

        with open(folderName + "mapDummyToRealDic.json", "w") as f:
            json.dump(mapDummyToRealDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", "r")
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", "r")
        mapDummyToRealDic = json.load(json_data)

        gapContentLookUpList = []
        gapContentLookUpList = generateGapContentLookup(
            folderName, mummerLink, resolvedList, contigReadGraph, contigFilename, readsetFilename, mapDummyToRealDic
        )
        gapContentLookUpDic = {}
        gapContentLookUpList.sort()

        for eachitem in gapContentLookUpList:
            gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [eachitem[2], eachitem[3], eachitem[4]]
            print eachitem[2:4], len(eachitem[4])

        with open(folderName + "gapContentLookUpDic.json", "w") as f:
            json.dump(gapContentLookUpDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", "r")
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", "r")
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(N1 + len(mapDummyToRealDic))
        addEdges(G, resolvedList)
        G.condense()

        G.saveToFile(folderName, "xResolvedGraph")

    if False:
        json_data = open(folderName + "mapDummyToRealDic.json", "r")
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(0)
        G.loadFromFile(folderName, "xResolvedGraph")

        json_data = open(folderName + "gapContentLookUpDic.json", "r")
        gapContentLookUpDic = json.load(json_data)

        print "Final step: really hacking a file"
        os.system("cp " + folderName + contigFilename + "_Double.fasta " + folderName + "tmpWithDummy.fasta")
        contigList = IORobot.readContigsFromFile(folderName, contigFilename + "_Double.fasta")

        f = open(folderName + "tmpWithDummy.fasta", "a")
        for i in range(len(mapDummyToRealDic)):
            id = mapDummyToRealDic[str(i)]
            f.write(">SegDum" + str(i) + "\n")
            f.write(contigList[id] + "\n")
        f.close()

        IORobot.extractGraphToContigs(
            G, folderName, mummerLink, "abun.fasta", "tmpWithDummy.fasta", gapContentLookUpDic, mapDummyToRealDic
        )
Ejemplo n.º 7
0
def abunSplitWithXResolve(folderName, mummerLink, myCountDic, contigReadGraph,
                          contigFilename, readsetFilename):
    N1 = len(myCountDic) * 2
    print "N1", N1

    # Debug
    G = graphLib.seqGraph(0)
    G.loadFromFile(folderName, contigReadGraph)

    adj = [[] for i in range(N1)]

    for i in range(N1):
        adj[i] = abunGraphLib.findAllReachable(i, N1, G)

    Gnew = graphLib.seqGraph(N1)

    for i in range(N1):
        for j in adj[i]:
            Gnew.insertEdge(i, j, 1)

    Gnew.reportEdge()
    # End Debug

    if False:
        json_data = open(folderName + "phaseRepeat.txt", 'r')
        repeatPairs = json.load(json_data)
        repeatPairs = obtainNonEmpty(repeatPairs)

        biResolvedCombineList = []
        for eachitem in repeatPairs:
            inList, outList = eachitem[0], eachitem[1]
            resolvedList = determindMatch(inList, outList, myCountDic,
                                          folderName, contigReadGraph, N1)

            biResolvedCombineList += resolvedList

        ### Xnode repeatResolution
        xResolvedList, mapDummyToRealDic = xNodeResolving(
            folderName, contigReadGraph)

        ### Combine resolution
        resolvedList = xResolvedList + biResolvedCombineList
        resolvedList = abunHouseKeeper.getDistinct(resolvedList)
        print "resolvedList, len(resolvedList),len(xResolvedList), len(biResolvedCombineList) ", resolvedList, len(
            resolvedList), len(xResolvedList), len(biResolvedCombineList)

        with open(folderName + "resolvedList.json", 'w') as f:
            json.dump(resolvedList, f)

        with open(folderName + "mapDummyToRealDic.json", 'w') as f:
            json.dump(mapDummyToRealDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", 'r')
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", 'r')
        mapDummyToRealDic = json.load(json_data)

        gapContentLookUpList = []
        gapContentLookUpList = generateGapContentLookup(
            folderName, mummerLink, resolvedList, contigReadGraph,
            contigFilename, readsetFilename, mapDummyToRealDic)
        gapContentLookUpDic = {}
        gapContentLookUpList.sort()

        for eachitem in gapContentLookUpList:
            gapContentLookUpDic[str(eachitem[0]) + "_" + str(eachitem[1])] = [
                eachitem[2], eachitem[3], eachitem[4]
            ]
            print eachitem[2:4], len(eachitem[4])

        with open(folderName + "gapContentLookUpDic.json", 'w') as f:
            json.dump(gapContentLookUpDic, f)

    if False:
        json_data = open(folderName + "resolvedList.json", 'r')
        resolvedList = json.load(json_data)

        json_data = open(folderName + "mapDummyToRealDic.json", 'r')
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(N1 + len(mapDummyToRealDic))
        addEdges(G, resolvedList)
        G.condense()

        G.saveToFile(folderName, "xResolvedGraph")

    if False:
        json_data = open(folderName + "mapDummyToRealDic.json", 'r')
        mapDummyToRealDic = json.load(json_data)

        G = graphLib.seqGraph(0)
        G.loadFromFile(folderName, "xResolvedGraph")

        json_data = open(folderName + "gapContentLookUpDic.json", 'r')
        gapContentLookUpDic = json.load(json_data)

        print "Final step: really hacking a file"
        os.system("cp " + folderName + contigFilename + "_Double.fasta " +
                  folderName + "tmpWithDummy.fasta")
        contigList = IORobot.readContigsFromFile(
            folderName, contigFilename + "_Double.fasta")

        f = open(folderName + "tmpWithDummy.fasta", 'a')
        for i in range(len(mapDummyToRealDic)):
            id = mapDummyToRealDic[str(i)]
            f.write(">SegDum" + str(i) + "\n")
            f.write(contigList[id] + "\n")
        f.close()

        IORobot.extractGraphToContigs(G, folderName, mummerLink, "abun.fasta",
                                      "tmpWithDummy.fasta",
                                      gapContentLookUpDic, mapDummyToRealDic)