Beispiel #1
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader",  20, 20  
    contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment )
    
    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)
    
    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)
    
    contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename)
    
    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
    
    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic)
    
    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename)
    
    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)
    
    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList)

    rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
Beispiel #2
0
    def test_findSpanReadsListEmbed(self):
        connectingReadsList = []
        contigDummyLRecord = [601, 800, 1, 200, 200, 200, 100.0, 800, 400, "ContigDummyL", "ReadDummy"]
        contigDummyBRecord1 = [1, 200, 101, 300, 200, 200, 100.0, 200, 400, "ContigDummyB1", "ReadDummy"]
        contigDummyBRecord2 = [1, 200, 350, 151, 200, 200, 100.0, 200, 400, "ContigDummyB2", "ReadDummy"]
        contigDummyRRecord = [1, 200, 201, 400, 200, 200, 100.0, 800, 400, "ContigDummyR", "ReadDummy"]

        connectingReadsList.append(["ReadDummy", "L", "ContigDummyL", contigDummyLRecord])
        connectingReadsList.append(["ReadDummy", "B", "ContigDummyB1", contigDummyBRecord1])
        connectingReadsList.append(["ReadDummy", "B", "ContigDummyB2", contigDummyBRecord2])
        connectingReadsList.append(["ReadDummy", "R", "ContigDummyR", contigDummyRRecord])
        spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)

        expectedSpanReadsList = [
            ["ContigDummyL_p", "ContigDummyB1_p", "ReadDummy"],
            ["ContigDummyB1_p", "ContigDummyB2_d", "ReadDummy"],
            ["ContigDummyB2_d", "ContigDummyR_p", "ReadDummy"],
        ]

        assert spanReadsList.sort() == expectedSpanReadsList.sort()
        assert len(contigGapReadLookUpDic) == 3

        assert (
            contigGapReadLookUpDic["ContigDummyL_p-ContigDummyB1_p"].sort()
            == [[contigDummyLRecord, contigDummyBRecord1]].sort()
        )
        assert (
            contigGapReadLookUpDic["ContigDummyB1_p-ContigDummyB2_d"].sort()
            == [[contigDummyBRecord1, contigDummyBRecord2]].sort()
        )
        assert (
            contigGapReadLookUpDic["ContigDummyB2_d-ContigDummyR_p"].sort()
            == [[contigDummyBRecord2, contigDummyRRecord]].sort()
        )
Beispiel #3
0
 def test_findSpanReadsList(self):
     connectingReadsList = []
     contigDummyLRecord, contigDummyRRecord = [601, 800, 1, 200, 200, 200, 100.0, 800, 400, 'ContigDummyL', 'ReadDummy'],  [1, 200, 201, 400, 200, 200, 100.0, 800, 400, 'ContigDummyR', 'ReadDummy']
     connectingReadsList.append(['ReadDummy', 'L', 'ContigDummyL', contigDummyLRecord])
     connectingReadsList.append(['ReadDummy', 'R', 'ContigDummyR', contigDummyRRecord])
     spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)
     
     assert(spanReadsList == [  ['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy'] ] )
     assert(len(contigGapReadLookUpDic) == 1)
     assert(contigGapReadLookUpDic['ContigDummyL_p-ContigDummyR_p'].sort() == [[contigDummyLRecord,contigDummyRRecord]].sort())
Beispiel #4
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename,
             useSpades, noAlignment, scoreListOutputName,
             outputContigsFilename, mScoreThres, conScoreThres,
             setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20
    contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink,
                                               readsFilename, contigsFilename,
                                               splitNum, outputHeader,
                                               parallelNum, noAlignment)

    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)

    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(
        connectingReadsList)

    contigsNamesList = alignmentLib.findContigsNames(folderName,
                                                     contigsFilename)

    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)

    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(
        condenseCandidatesList, connectingReadsList, contigsNamesList,
        setCoverOption, multiplicityDic)

    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename,
                                          targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList, folderName,
                                            contigsFilename)

    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)

    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(
        scoreStructList)

    rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename,
                            readsFilename, scoreListWithDummy,
                            contigGapReadLookUpDic, mScoreThres, conScoreThres,
                            scoreListOutputName, outputContigsFilename,
                            dummyNodeDataRobot)
Beispiel #5
0
    def test_findSpanReadsList(self):
        connectingReadsList = []
        contigDummyLRecord, contigDummyRRecord = (
            [601, 800, 1, 200, 200, 200, 100.0, 800, 400, "ContigDummyL", "ReadDummy"],
            [1, 200, 201, 400, 200, 200, 100.0, 800, 400, "ContigDummyR", "ReadDummy"],
        )
        connectingReadsList.append(["ReadDummy", "L", "ContigDummyL", contigDummyLRecord])
        connectingReadsList.append(["ReadDummy", "R", "ContigDummyR", contigDummyRRecord])
        spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)

        assert spanReadsList == [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]]
        assert len(contigGapReadLookUpDic) == 1
        assert (
            contigGapReadLookUpDic["ContigDummyL_p-ContigDummyR_p"].sort()
            == [[contigDummyLRecord, contigDummyRRecord]].sort()
        )
Beispiel #6
0
    def test_findSpanReadsListEmbed(self):
        connectingReadsList = []
        contigDummyLRecord = [601, 800, 1, 200, 200, 200, 100.0, 800, 400, 'ContigDummyL', 'ReadDummy']  
        contigDummyBRecord1 = [1, 200, 101, 300, 200, 200, 100.0, 200, 400, 'ContigDummyB1', 'ReadDummy']
        contigDummyBRecord2 = [1, 200, 350, 151, 200, 200, 100.0, 200, 400, 'ContigDummyB2', 'ReadDummy']
        contigDummyRRecord = [1, 200, 201, 400, 200, 200, 100.0, 800, 400, 'ContigDummyR', 'ReadDummy']
        
        connectingReadsList.append(['ReadDummy', 'L', 'ContigDummyL', contigDummyLRecord])
        connectingReadsList.append(['ReadDummy', 'B', 'ContigDummyB1', contigDummyBRecord1])
        connectingReadsList.append(['ReadDummy', 'B', 'ContigDummyB2', contigDummyBRecord2])
        connectingReadsList.append(['ReadDummy', 'R', 'ContigDummyR', contigDummyRRecord])
        spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)
        
        expectedSpanReadsList =  [['ContigDummyL_p', 'ContigDummyB1_p', 'ReadDummy'], \
                                  ['ContigDummyB1_p', 'ContigDummyB2_d', 'ReadDummy'], \
                                  ['ContigDummyB2_d', 'ContigDummyR_p', 'ReadDummy']]
        
        assert(spanReadsList.sort() == expectedSpanReadsList.sort())
        assert(len(contigGapReadLookUpDic) == 3)

        assert(contigGapReadLookUpDic['ContigDummyL_p-ContigDummyB1_p'].sort() == [[contigDummyLRecord,contigDummyBRecord1]].sort())
        assert(contigGapReadLookUpDic['ContigDummyB1_p-ContigDummyB2_d'].sort() == [[contigDummyBRecord1,contigDummyBRecord2]].sort())
        assert(contigGapReadLookUpDic['ContigDummyB2_d-ContigDummyR_p'].sort() == [[contigDummyBRecord2,contigDummyRRecord]].sort())