Example #1
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader",  20, 20  
    contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment )
    
    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)
    
    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)
    
    contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename)
    
    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
    
    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic)
    
    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename)
    
    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)
    
    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList)

    rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
Example #2
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename,
             useSpades, noAlignment, scoreListOutputName,
             outputContigsFilename, mScoreThres, conScoreThres,
             setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20
    contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink,
                                               readsFilename, contigsFilename,
                                               splitNum, outputHeader,
                                               parallelNum, noAlignment)

    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)

    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(
        connectingReadsList)

    contigsNamesList = alignmentLib.findContigsNames(folderName,
                                                     contigsFilename)

    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)

    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(
        condenseCandidatesList, connectingReadsList, contigsNamesList,
        setCoverOption, multiplicityDic)

    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename,
                                          targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList, folderName,
                                            contigsFilename)

    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)

    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(
        scoreStructList)

    rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename,
                            readsFilename, scoreListWithDummy,
                            contigGapReadLookUpDic, mScoreThres, conScoreThres,
                            scoreListOutputName, outputContigsFilename,
                            dummyNodeDataRobot)
Example #3
0
    def test_assignCoverageFromDataList(self):
        dataList, contigList = [ [1, 6, 1, 6, 6, 6, 100.0, 6, 6, 'ContigDummyL', 'ReadDummy'] ], []

        contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description=""))
        contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description=""))
        SeqIO.write(contigList, self.folderName + self.contigsFilename , "fasta")
        
        spanReadsList, contigsNamesList = [['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy']], ['ContigDummyL', 'ContigDummyR']
        G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
        cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename)

        assert(G.dicOfContigNodes['ContigDummyL'].contigLength == 6)
        assert(G.dicOfContigNodes['ContigDummyR'].contigLength == 7)
        assert(G.dicOfContigNodes['ContigDummyL'].readToContigCount == 1)
        assert(G.dicOfContigNodes['ContigDummyR'].readToContigCount == 0)
Example #4
0
    def test_calculateConfidenceScore(self):
        condenseCandidatesList = [[['ContigDummyL_R~ContigDummyR_L~1'], False]]
        spanReadsList, contigsNamesList = [['ContigDummyL_p', 'ContigDummyR_p', 'ReadDummy']], ['ContigDummyL', 'ContigDummyR']
        
        dataList, contigList = [ [1, 6, 1, 6, 6, 6, 100.0, 6, 6, 'ContigDummyL', 'ReadDummy'] ], []
        contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description=""))
        contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description=""))
        SeqIO.write(contigList, self.folderName + self.contigsFilename , "fasta")

        G = graphLib.formContigGraph(spanReadsList, contigsNamesList)        
        cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename)
        scoreStructList = cTestLib.calculateConfidenceScore(G, condenseCandidatesList)

        assert(scoreStructList[0][0][0][0] == 'ContigDummyL_R~ContigDummyR_L~1' )
        assert(abs(scoreStructList[0][0][0][1] -  0.53846153846153844) < 0.01)
        assert(scoreStructList[0][0][0][2] == 1)
        assert(scoreStructList[0][1] == False)
Example #5
0
    def test_assignCoverageFromDataList(self):
        dataList, contigList = [[1, 6, 1, 6, 6, 6, 100.0, 6, 6, "ContigDummyL", "ReadDummy"]], []

        contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description=""))
        contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description=""))
        SeqIO.write(contigList, self.folderName + self.contigsFilename, "fasta")

        spanReadsList, contigsNamesList = (
            [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]],
            ["ContigDummyL", "ContigDummyR"],
        )
        G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
        cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename)

        assert G.dicOfContigNodes["ContigDummyL"].contigLength == 6
        assert G.dicOfContigNodes["ContigDummyR"].contigLength == 7
        assert G.dicOfContigNodes["ContigDummyL"].readToContigCount == 1
        assert G.dicOfContigNodes["ContigDummyR"].readToContigCount == 0
Example #6
0
    def test_calculateConfidenceScore(self):
        condenseCandidatesList = [[["ContigDummyL_R~ContigDummyR_L~1"], False]]
        spanReadsList, contigsNamesList = (
            [["ContigDummyL_p", "ContigDummyR_p", "ReadDummy"]],
            ["ContigDummyL", "ContigDummyR"],
        )

        dataList, contigList = [[1, 6, 1, 6, 6, 6, 100.0, 6, 6, "ContigDummyL", "ReadDummy"]], []
        contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description=""))
        contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description=""))
        SeqIO.write(contigList, self.folderName + self.contigsFilename, "fasta")

        G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
        cTestLib.assignCoverageFromDataList(G, dataList, self.folderName, self.contigsFilename)
        scoreStructList = cTestLib.calculateConfidenceScore(G, condenseCandidatesList)

        assert scoreStructList[0][0][0][0] == "ContigDummyL_R~ContigDummyR_L~1"
        assert abs(scoreStructList[0][0][0][1] - 0.53846153846153844) < 0.01
        assert scoreStructList[0][0][0][2] == 1
        assert scoreStructList[0][1] == False