Exemple #1
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader",  20, 20  
    contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment )
    
    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)
    
    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)
    
    contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename)
    
    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
    
    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic)
    
    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename)
    
    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)
    
    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList)

    rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
Exemple #2
0
    def test_rankAndMerge(self):
        contigList = []
        contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description=""))
        contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description=""))
        SeqIO.write(contigList, self.folderName + self.contigsFilename , "fasta")
        
        SeqIO.write([SeqRecord(Seq("CCCGGGCCC", generic_dna), id="ReadDummy", description="")], self.folderName + self.readsFilename , "fasta")
        
        scoreList = [ ['ContigDummyL_R~ContigDummyR_L~1' , 1 , 1] ] 

        contigGapReadLookUpDic = {}
        contigDummyLRecord, contigDummyRRecord = [4, 6, 1, 3, 3, 3, 100.0, 6, 9, 'ContigDummyL', 'ReadDummy'],  [1, 3, 7, 9, 3, 3, 100.0, 7, 9, 'ContigDummyR', 'ReadDummy']
        contigGapReadLookUpDic['ContigDummyL_p-ContigDummyR_p'] = [[contigDummyLRecord, contigDummyRRecord]]

        contigsNamesList = alignmentLib.findContigsNames(self.folderName, self.contigsFilename)
        
        dummyNodeDataRobot = setCoverLib.dummyNodeController()

        dummyNodeDataRobot.realToDummyDic = {'ContigDummyL': 'ContigDummyL','ContigDummyR': 'ContigDummyR'}
        dummyNodeDataRobot.dummyToRealDic = {'ContigDummyL': 'ContigDummyL','ContigDummyR': 'ContigDummyR'}

        rankingLib.rankAndMerge(self.folderName,contigsNamesList, self.contigsFilename, self.readsFilename, scoreList, contigGapReadLookUpDic, 1, 0.95, "scoreList.json", "improved.fasta", dummyNodeDataRobot)
         
        expectedContig= "AAACCC" + "GGG" + "CCCTTTT"
        records = list(SeqIO.parse(self.folderName + "improved.fasta", "fasta"))
        assert(expectedContig == str(records[0].seq))
Exemple #3
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename,
             useSpades, noAlignment, scoreListOutputName,
             outputContigsFilename, mScoreThres, conScoreThres,
             setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20
    contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink,
                                               readsFilename, contigsFilename,
                                               splitNum, outputHeader,
                                               parallelNum, noAlignment)

    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)

    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(
        connectingReadsList)

    contigsNamesList = alignmentLib.findContigsNames(folderName,
                                                     contigsFilename)

    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)

    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(
        condenseCandidatesList, connectingReadsList, contigsNamesList,
        setCoverOption, multiplicityDic)

    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename,
                                          targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList, folderName,
                                            contigsFilename)

    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)

    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(
        scoreStructList)

    rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename,
                            readsFilename, scoreListWithDummy,
                            contigGapReadLookUpDic, mScoreThres, conScoreThres,
                            scoreListOutputName, outputContigsFilename,
                            dummyNodeDataRobot)
Exemple #4
0
    def test_rankAndMerge(self):
        contigList = []
        contigList.append(SeqRecord(Seq("AAACCC", generic_dna), id="ContigDummyL", description=""))
        contigList.append(SeqRecord(Seq("CCCTTTT", generic_dna), id="ContigDummyR", description=""))
        SeqIO.write(contigList, self.folderName + self.contigsFilename, "fasta")

        SeqIO.write(
            [SeqRecord(Seq("CCCGGGCCC", generic_dna), id="ReadDummy", description="")],
            self.folderName + self.readsFilename,
            "fasta",
        )

        scoreList = [["ContigDummyL_R~ContigDummyR_L~1", 1, 1]]

        contigGapReadLookUpDic = {}
        contigDummyLRecord, contigDummyRRecord = (
            [4, 6, 1, 3, 3, 3, 100.0, 6, 9, "ContigDummyL", "ReadDummy"],
            [1, 3, 7, 9, 3, 3, 100.0, 7, 9, "ContigDummyR", "ReadDummy"],
        )
        contigGapReadLookUpDic["ContigDummyL_p-ContigDummyR_p"] = [[contigDummyLRecord, contigDummyRRecord]]

        contigsNamesList = alignmentLib.findContigsNames(self.folderName, self.contigsFilename)

        dummyNodeDataRobot = setCoverLib.dummyNodeController()

        dummyNodeDataRobot.realToDummyDic = {"ContigDummyL": "ContigDummyL", "ContigDummyR": "ContigDummyR"}
        dummyNodeDataRobot.dummyToRealDic = {"ContigDummyL": "ContigDummyL", "ContigDummyR": "ContigDummyR"}

        rankingLib.rankAndMerge(
            self.folderName,
            contigsNamesList,
            self.contigsFilename,
            self.readsFilename,
            scoreList,
            contigGapReadLookUpDic,
            1,
            0.95,
            "scoreList.json",
            "improved.fasta",
            dummyNodeDataRobot,
        )

        expectedContig = "AAACCC" + "GGG" + "CCCTTTT"
        records = list(SeqIO.parse(self.folderName + "improved.fasta", "fasta"))
        assert expectedContig == str(records[0].seq)