Ejemplo n.º 1
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename, useSpades, noAlignment, scoreListOutputName, outputContigsFilename, mScoreThres, conScoreThres, setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader",  20, 20  
    contigsFilename, readsFilename= "tmp" + inputContigsFilename , "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink, readsFilename, contigsFilename, splitNum, outputHeader, parallelNum, noAlignment )
    
    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)
    
    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(connectingReadsList)
    
    contigsNamesList = alignmentLib.findContigsNames(folderName, contigsFilename)
    
    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)
    
    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(condenseCandidatesList, connectingReadsList, contigsNamesList, setCoverOption, multiplicityDic)
    
    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename, targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList,folderName, contigsFilename)
    
    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)
    
    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(scoreStructList)

    rankingLib.rankAndMerge(folderName,contigsNamesList, contigsFilename, readsFilename, scoreListWithDummy, contigGapReadLookUpDic, mScoreThres, conScoreThres, scoreListOutputName, outputContigsFilename, dummyNodeDataRobot)
Ejemplo n.º 2
0
def mainFlow(folderName, mummerLink, inputContigsFilename, inputReadsFilename,
             useSpades, noAlignment, scoreListOutputName,
             outputContigsFilename, mScoreThres, conScoreThres,
             setCoverOption):
    outputHeader, splitNum, parallelNum = "readToContigHeader", 20, 20
    contigsFilename, readsFilename = "tmp" + inputContigsFilename, "tmp" + inputReadsFilename

    targetToSourceContigsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputContigsFilename, contigsFilename, noAlignment)
    targetToSourceReadsNamesDic = houseKeeperLib.transformFileHeaders(
        folderName, inputReadsFilename, readsFilename, noAlignment)

    dataList = alignmentLib.extractRead2Contig(folderName, mummerLink,
                                               readsFilename, contigsFilename,
                                               splitNum, outputHeader,
                                               parallelNum, noAlignment)

    connectingReadsList = readConnectivityLib.findConnectingReadsList(dataList)

    spanReadsList, contigGapReadLookUpDic = readConnectivityLib.findSpanReadsList(
        connectingReadsList)

    contigsNamesList = alignmentLib.findContigsNames(folderName,
                                                     contigsFilename)

    G = graphLib.formContigGraph(spanReadsList, contigsNamesList)

    condenseCandidatesList = G.findCondenseCandidatesList()

    multiplicityDic = G.findEdgeMultiplicity()

    potentialMergesList = setCoverLib.extendConnectivityFromReads(
        condenseCandidatesList, connectingReadsList, contigsNamesList,
        setCoverOption, multiplicityDic)

    if useSpades == True:
        cTestLib.assignCoverageFromHeader(G, folderName, contigsFilename,
                                          targetToSourceContigsNamesDic)
    else:
        cTestLib.assignCoverageFromDataList(G, dataList, folderName,
                                            contigsFilename)

    scoreStructList = cTestLib.calculateConfidenceScore(G, potentialMergesList)

    scoreListWithDummy, dummyNodeDataRobot = setCoverLib.assignRepeatedNodesToDummy(
        scoreStructList)

    rankingLib.rankAndMerge(folderName, contigsNamesList, contigsFilename,
                            readsFilename, scoreListWithDummy,
                            contigGapReadLookUpDic, mScoreThres, conScoreThres,
                            scoreListOutputName, outputContigsFilename,
                            dummyNodeDataRobot)
Ejemplo n.º 3
0
 def test_extractRead2Contig(self):
     self.createSimpleFasta()
     dataList = alignmentLib.extractRead2Contig(
         self.folderName,
         self.mummerLink,
         self.readsFilename,
         self.contigsFilename,
         self.splitNum,
         self.outputHeader,
         self.parallelNum,
         self.debug,
     )
     assert dataList == [[11, 80, 1, 70, 70, 70, 100.0, 100, 70, "ContigDummy", "ReadDummy"]]
Ejemplo n.º 4
0
 def test_extractRead2Contig(self):
     self.createSimpleFasta()   
     dataList = alignmentLib.extractRead2Contig(self.folderName, self.mummerLink, self.readsFilename, \
                 self.contigsFilename, self.splitNum, self.outputHeader, self.parallelNum, self.debug)
     assert(dataList == [[11, 80, 1, 70, 70, 70, 100.0, 100, 70, 'ContigDummy', 'ReadDummy']])