def callGenome(self, taskPrefix="", dependencies=None):
    """
    run variant caller on all genome segments
    """

    tmpSegmentDir = self.paths.getTmpSegmentDir()
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           getMkdirCmd() + [tmpSegmentDir],
                           dependencies=dependencies,
                           isForceLocal=True)

    segmentTasks = set()

    segFiles = TempSegmentFiles()
    for gseg in getNextGenomeSegment(self.params):

        segmentTasks |= callGenomeSegment(self,
                                          gseg,
                                          segFiles,
                                          dependencies=dirTask)

    if len(segmentTasks) == 0:
        raise Exception(
            "No genome regions to analyze. Possible target region parse error."
        )

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=segmentTasks)

    finishTasks = set()

    finishTasks.add(
        self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.denovo,
                            self.paths.getDenovoOutputPath(), "denovo"))

    # merge segment stats:
    finishTasks.add(
        self.mergeRunStats(taskPrefix, completeSegmentsTask, segFiles.stats))

    if self.params.isOutputCallableRegions:
        finishTasks.add(
            self.concatIndexBed(taskPrefix, completeSegmentsTask,
                                segFiles.callable,
                                self.paths.getRegionOutputPath(),
                                "callableRegions"))

    if not self.params.isRetainTempFiles:
        rmStatsTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"),
                              rmStatsTmpCmd,
                              dependencies=finishTasks,
                              isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
 def getStrelkaGenomeSegmentGroupIterator(self,
                                          contigsExcludedFromGrouping=None):
     """
     setup genome segment iteration for germline and somatic calling,
     including  clumping together small segments into groups.
     """
     genomeSegmentIterator = self.filterUncalledChromosomeSegments(
         getNextGenomeSegment(self.params))
     return getGenomeSegmentGroups(genomeSegmentIterator,
                                   contigsExcludedFromGrouping)
def callGenome(self, taskPrefix="", dependencies=None):
    """
    run counter on all genome segments
    """

    tmpSegmentDir = self.paths.getTmpSegmentDir()
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           getMkdirCmd() + [tmpSegmentDir],
                           dependencies=dependencies,
                           isForceLocal=True)

    segmentTasks = set()

    segFiles = TempSegmentFiles()
    for gseg in getNextGenomeSegment(self.params):

        segmentTasks |= callGenomeSegment(self,
                                          gseg,
                                          segFiles,
                                          dependencies=dirTask)

    if len(segmentTasks) == 0:
        raise Exception(
            "No genome regions to analyze. Possible target region parse error."
        )

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=segmentTasks)

    finishTasks = set()

    # merge segment stats:
    finishTasks.add(
        mergeSequenceErrorCounts(self, taskPrefix, completeSegmentsTask,
                                 segFiles.counts))

    if self.params.isReportObservedIndels:
        finishTasks.add(
            self.concatIndexBed(taskPrefix, completeSegmentsTask,
                                segFiles.observedIndelBed,
                                self.paths.getObservedIndelBedPath(),
                                "observedIndels"))

    if not self.params.isRetainTempFiles:
        rmTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"),
                              rmTmpCmd,
                              dependencies=finishTasks,
                              isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
def callGenome(self, taskPrefix="", dependencies=None):
    """
    run variant caller on all genome segments
    """

    tmpGraphDir = self.paths.getTmpSegmentDir()
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           "mkdir -p " + tmpGraphDir,
                           dependencies=dependencies,
                           isForceLocal=True)

    graphTasks = set()

    segFiles = TempSegmentFiles()
    for gseg in getNextGenomeSegment(self.params):

        graphTasks |= callGenomeSegment(self,
                                        gseg,
                                        segFiles,
                                        dependencies=dirTask)

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=graphTasks)

    finishTasks = set()

    def finishVcf(tmpList, output, label):
        assert (len(tmpList) > 0)

        if len(tmpList) > 1:
            catCmd = [self.params.bgcatBin, "-o", output]
            catCmd.extend(tmpList)
            catCmd = " ".join(catCmd)
        else:
            catCmd = "mv -f %s %s" % (tmpList[0], output)

        catCmd += " && %s -p vcf %s" % (self.params.tabixBin, output)
        finishTasks.add(
            self.addTask(preJoin(taskPrefix, label + "_finalizeVCF"),
                         catCmd,
                         dependencies=completeSegmentsTask))

    finishVcf(segFiles.gvcf, self.paths.getGvcfOutputPath(), "gVCF")

    cleanTask = self.addTask(preJoin(taskPrefix, "cleanTmpDir"),
                             "rm -rf " + tmpGraphDir,
                             dependencies=finishTasks,
                             isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
Exemple #5
0
 def getGenomeSegmentGroups(params):
     """
     Iterate segment groups and 'clump' small contigs together
     """
     minSegmentGroupSize = 200000
     group = []
     headSize = 0
     for gseg in getNextGenomeSegment(self.params):
         if headSize + gseg.size() <= minSegmentGroupSize:
             group.append(gseg)
             headSize += gseg.size()
         else:
             if len(group) != 0: yield (group)
             group = [gseg]
             headSize = gseg.size()
     if len(group) != 0: yield (group)
Exemple #6
0
 def getGenomeSegmentGroups(params) :
     """
     Iterate segment groups and 'clump' small contigs together
     """
     minSegmentGroupSize=200000
     group = []
     headSize = 0
     for gseg in getNextGenomeSegment(self.params) :
         if headSize+gseg.size() <= minSegmentGroupSize :
             group.append(gseg)
             headSize += gseg.size()
         else :
             if len(group) != 0 : yield(group)
             group = [gseg]
             headSize = gseg.size()
     if len(group) != 0 : yield(group)
Exemple #7
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    tmpGraphDir=self.paths.getTmpGraphDir()

    makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir]
    dirTask = self.addTask(preJoin(taskPrefix,"makeGraphTmpDir"), makeTmpGraphDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gsegGroup in getGenomeSegmentGroups(getNextGenomeSegment(self.params)) :
        assert(len(gsegGroup) != 0)
        gid=gsegGroup[0].id
        if len(gsegGroup) > 1 :
            gid += "_to_"+gsegGroup[-1].id
        tmpGraphFiles.append(self.paths.getTmpGraphFile(gid))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        for gseg in gsegGroup :
            graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTask=preJoin(taskPrefix,"makeLocusGraph_"+gid)
        graphTasks.add(self.addTask(graphTask,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    tmpGraphFileList = self.paths.getTmpGraphFileListPath()
    tmpGraphFileListTask = preJoin(taskPrefix,"mergeLocusGraphInputList")
    self.addWorkflowTask(tmpGraphFileListTask,listFileWorkflow(tmpGraphFileList,tmpGraphFiles),dependencies=graphTasks)

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    mergeCmd.extend(["--graph-file-list",tmpGraphFileList])
    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=tmpGraphFileListTask,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    if not self.params.isRetainTempFiles :
        rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
        rmTask=self.addTask(preJoin(taskPrefix,"removeTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = [self.params.mantaGraphStatsBin,"--global"]
    graphStatsCmd.extend(["--graph-file",graphPath])
    graphStatsCmd.extend(["--output-file",graphStatsPath])

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Exemple #8
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    graphFilename=os.path.basename(graphPath)
    tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir")
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpGraphDir, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gseg in getNextGenomeSegment(self.params) :

        tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gseg.id+".bin"))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gseg.pyflowId)
        graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    for gfile in tmpGraphFiles :
        mergeCmd.extend(["--graph-file", gfile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    rmGraphTmpCmd = "rm -rf " + tmpGraphDir
    rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = self.params.mantaGraphStatsBin
    graphStatsCmd += " --global"
    graphStatsCmd += " --graph-file " + graphPath
    graphStatsCmd += " >| " + graphStatsPath

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    graphFilename=os.path.basename(graphPath)
    tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir")
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpGraphDir, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gseg in getNextGenomeSegment(self.params) :

        tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gseg.id+".bin"))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gseg.pyflowId)
        graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    for gfile in tmpGraphFiles :
        mergeCmd.extend(["--graph-file", gfile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    rmGraphTmpCmd = "rm -rf " + tmpGraphDir
    rmTask=self.addTask(preJoin(taskPrefix,"rmGraphTmp"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = self.params.mantaGraphStatsBin
    graphStatsCmd += " --global"
    graphStatsCmd += " --graph-file " + graphPath
    graphStatsCmd += " >| " + graphStatsPath

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Exemple #10
0
def runLocusGraph(self, taskPrefix="", dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath = self.paths.getStatsPath()
    graphPath = self.paths.getGraphPath()
    graphStatsPath = self.paths.getGraphStatsPath()

    tmpGraphDir = self.paths.getTmpGraphDir()

    makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir]
    dirTask = self.addTask(preJoin(taskPrefix, "makeGraphTmpDir"),
                           makeTmpGraphDirCmd,
                           dependencies=dependencies,
                           isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gsegGroup in getGenomeSegmentGroups(getNextGenomeSegment(self.params)):
        assert (len(gsegGroup) != 0)
        gid = gsegGroup[0].id
        if len(gsegGroup) > 1:
            gid += "_to_" + gsegGroup[-1].id
        tmpGraphFiles.append(self.paths.getTmpGraphFile(gid))
        graphCmd = [self.params.mantaGraphBin]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats", statsPath])
        for gseg in gsegGroup:
            graphCmd.extend(["--region", gseg.bamRegion])
        graphCmd.extend(
            ["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(
            ["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref", self.params.referenceFasta])
        for bamPath in self.params.normalBamList:
            graphCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList:
            graphCmd.extend(["--tumor-align-file", bamPath])

        if self.params.isHighDepthFilter:
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair:
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA:
            graphCmd.append("--rna")

        graphTask = preJoin(taskPrefix, "makeLocusGraph_" + gid)
        graphTasks.add(
            self.addTask(graphTask,
                         graphCmd,
                         dependencies=dirTask,
                         memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0:
        raise Exception(
            "No SV Locus graphs to create. Possible target region parse error."
        )

    tmpGraphFileList = self.paths.getTmpGraphFileListPath()
    tmpGraphFileListTask = preJoin(taskPrefix, "mergeLocusGraphInputList")
    self.addWorkflowTask(tmpGraphFileListTask,
                         listFileWorkflow(tmpGraphFileList, tmpGraphFiles),
                         dependencies=graphTasks)

    mergeCmd = [self.params.mantaGraphMergeBin]
    mergeCmd.extend(["--output-file", graphPath])
    mergeCmd.extend(["--graph-file-list", tmpGraphFileList])
    mergeTask = self.addTask(preJoin(taskPrefix, "mergeLocusGraph"),
                             mergeCmd,
                             dependencies=tmpGraphFileListTask,
                             memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [self.params.mantaGraphCheckBin]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix, "checkLocusGraph"),
                             checkCmd,
                             dependencies=mergeTask,
                             memMb=self.params.mergeMemMb)

    if not self.params.isRetainTempFiles:
        rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
        rmTask = self.addTask(preJoin(taskPrefix, "removeTmpDir"),
                              rmGraphTmpCmd,
                              dependencies=mergeTask)

    graphStatsCmd = [self.params.mantaGraphStatsBin, "--global"]
    graphStatsCmd.extend(["--graph-file", graphPath])
    graphStatsCmd.extend(["--output-file", graphStatsPath])

    graphStatsTask = self.addTask(preJoin(taskPrefix, "locusGraphStats"),
                                  graphStatsCmd,
                                  dependencies=mergeTask,
                                  memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait