Example #1
0
def runStats(self, taskPrefix="", dependencies=None):

    statsPath = self.paths.getStatsPath()
    statsFilename = os.path.basename(statsPath)

    tmpStatsDir = statsPath + ".tmpdir"

    makeTmpStatsDirCmd = getMkdirCmd() + [tmpStatsDir]
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           makeTmpStatsDirCmd,
                           dependencies=dependencies,
                           isForceLocal=True)

    tmpStatsFiles = []
    statsTasks = set()

    for (bamIndex, bamPath) in enumerate(self.params.normalBamList +
                                         self.params.tumorBamList):
        indexStr = str(bamIndex).zfill(3)
        tmpStatsFiles.append(
            os.path.join(tmpStatsDir, statsFilename + "." + indexStr + ".xml"))

        cmd = [self.params.mantaStatsBin]
        cmd.extend(["--output-file", tmpStatsFiles[-1]])
        cmd.extend(["--align-file", bamPath])

        statsTasks.add(
            self.addTask(preJoin(taskPrefix, "generateStats_" + indexStr),
                         cmd,
                         dependencies=dirTask))

    cmd = [self.params.mantaMergeStatsBin]
    cmd.extend(["--output-file", statsPath])
    for tmpStatsFile in tmpStatsFiles:
        cmd.extend(["--align-stats-file", tmpStatsFile])

    mergeTask = self.addTask(preJoin(taskPrefix, "mergeStats"),
                             cmd,
                             dependencies=statsTasks,
                             isForceLocal=True)

    nextStepWait = set()
    nextStepWait.add(mergeTask)

    if not self.params.isRetainTempFiles:
        rmStatsTmpCmd = getRmdirCmd() + [tmpStatsDir]
        rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"),
                              rmStatsTmpCmd,
                              dependencies=mergeTask,
                              isForceLocal=True)

    # summarize stats in format that's easier for human review
    cmd = [self.params.mantaStatsSummaryBin]
    cmd.extend(["--align-stats ", statsPath])
    cmd.extend(["--output-file", self.paths.getStatsSummaryPath()])
    self.addTask(preJoin(taskPrefix, "summarizeStats"),
                 cmd,
                 dependencies=mergeTask)

    return nextStepWait
def getSequenceErrorEstimates(self, taskPrefix="", dependencies=None):
    """
    Count sequence errors and use these to estimate error parameters
    """

    mkDirTask = preJoin(taskPrefix, "makeTmpDir")
    tmpErrorEstimationDir = self.paths.getTmpErrorEstimationDir()
    mkDirCmd = getMkdirCmd() + [tmpErrorEstimationDir]
    self.addTask(mkDirTask,
                 mkDirCmd,
                 dependencies=dependencies,
                 isForceLocal=True)

    estimationIntervals = getErrorEstimationIntervals(self.params)
    assert (len(estimationIntervals) != 0)

    # The count and estimation processes are currently independent for each sample
    sampleTasks = set()
    for sampleIndex in range(len(self.params.bamList)):
        sampleIndexStr = str(sampleIndex).zfill(3)
        sampleTask = preJoin(taskPrefix, "Sample" + sampleIndexStr)
        workflow = EstimateSequenceErrorWorkflowForSample(
            self.params, self.paths, estimationIntervals, sampleIndex)
        sampleTasks.add(
            self.addWorkflowTask(sampleTask, workflow, dependencies=mkDirTask))

    if not self.params.isRetainTempFiles:
        rmTmpCmd = getRmdirCmd() + [tmpErrorEstimationDir]
        self.addTask(preJoin(taskPrefix, "removeTmpDir"),
                     rmTmpCmd,
                     dependencies=sampleTasks,
                     isForceLocal=True)

    nextStepWait = sampleTasks
    return nextStepWait
def callGenome(self, taskPrefix="", dependencies=None):
    """
    run variant caller on all genome segments
    """

    tmpSegmentDir = self.paths.getTmpSegmentDir()
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           getMkdirCmd() + [tmpSegmentDir],
                           dependencies=dependencies,
                           isForceLocal=True)

    segmentTasks = set()

    segFiles = TempSegmentFiles()
    for gseg in getNextGenomeSegment(self.params):

        segmentTasks |= callGenomeSegment(self,
                                          gseg,
                                          segFiles,
                                          dependencies=dirTask)

    if len(segmentTasks) == 0:
        raise Exception(
            "No genome regions to analyze. Possible target region parse error."
        )

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=segmentTasks)

    finishTasks = set()

    finishTasks.add(
        self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.denovo,
                            self.paths.getDenovoOutputPath(), "denovo"))

    # merge segment stats:
    finishTasks.add(
        self.mergeRunStats(taskPrefix, completeSegmentsTask, segFiles.stats))

    if self.params.isOutputCallableRegions:
        finishTasks.add(
            self.concatIndexBed(taskPrefix, completeSegmentsTask,
                                segFiles.callable,
                                self.paths.getRegionOutputPath(),
                                "callableRegions"))

    if not self.params.isRetainTempFiles:
        rmStatsTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"),
                              rmStatsTmpCmd,
                              dependencies=finishTasks,
                              isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
def callGenome(self, taskPrefix="", dependencies=None):
    """
    run counter on all genome segments
    """

    tmpSegmentDir = self.paths.getTmpSegmentDir()
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           getMkdirCmd() + [tmpSegmentDir],
                           dependencies=dependencies,
                           isForceLocal=True)

    segmentTasks = set()

    segFiles = TempSegmentFiles()
    for gseg in getNextGenomeSegment(self.params):

        segmentTasks |= callGenomeSegment(self,
                                          gseg,
                                          segFiles,
                                          dependencies=dirTask)

    if len(segmentTasks) == 0:
        raise Exception(
            "No genome regions to analyze. Possible target region parse error."
        )

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=segmentTasks)

    finishTasks = set()

    # merge segment stats:
    finishTasks.add(
        mergeSequenceErrorCounts(self, taskPrefix, completeSegmentsTask,
                                 segFiles.counts))

    if self.params.isReportObservedIndels:
        finishTasks.add(
            self.concatIndexBed(taskPrefix, completeSegmentsTask,
                                segFiles.observedIndelBed,
                                self.paths.getObservedIndelBedPath(),
                                "observedIndels"))

    if not self.params.isRetainTempFiles:
        rmTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"),
                              rmTmpCmd,
                              dependencies=finishTasks,
                              isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
def callGenome(self,taskPrefix="",dependencies=None):
    """
    run strelka on all genome segments
    """

    tmpSegmentDir=self.paths.getTmpSegmentDir()
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), getMkdirCmd() + [tmpSegmentDir],
                         dependencies=dependencies, isForceLocal=True)

    segmentTasks = set()

    segFiles = TempVariantCallingSegmentFiles()

    for gsegGroup in self.getStrelkaGenomeSegmentGroupIterator() :
        segmentTasks |= callGenomeSegment(self, gsegGroup, segFiles, dependencies=dirTask)

    if len(segmentTasks) == 0 :
        raise Exception("No genome regions to analyze. Possible target region parse error.")

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,"completedAllGenomeSegments"),dependencies=segmentTasks)

    finishTasks = set()

    finishTasks.add(self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.snv,
                                        self.paths.getSnvOutputPath(),"SNV"))
    finishTasks.add(self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.indel,
                                        self.paths.getIndelOutputPath(),"Indel"))

    # merge segment stats:
    finishTasks.add(self.mergeRunStats(taskPrefix,completeSegmentsTask, segFiles.stats))

    if self.params.isOutputCallableRegions :
        finishTasks.add(self.concatIndexBed(taskPrefix, completeSegmentsTask, segFiles.callable,
                                            self.paths.getRegionOutputPath(), "callableRegions"))

    if self.params.isWriteRealignedBam :
        def catRealignedBam(label, segmentList) :
            output = self.paths.getRealignedBamPath(label)

            bamCatCmd = bamListCatCmd(self.params.samtoolsBin, segmentList, output)
            bamCatTaskLabel = preJoin(taskPrefix, "realignedBamCat_" + label)

            finishTasks.add(self.addTask(bamCatTaskLabel, bamCatCmd, dependencies=completeSegmentsTask))

        catRealignedBam("normal", segFiles.normalRealign)
        catRealignedBam("tumor", segFiles.tumorRealign)

    if not self.params.isRetainTempFiles :
        rmTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        self.addTask(preJoin(taskPrefix,"removeTmpDir"), rmTmpCmd, dependencies=finishTasks, isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
Example #6
0
def runStats(self,taskPrefix="",dependencies=None) :

    statsPath=self.paths.getStatsPath()
    statsFilename=os.path.basename(statsPath)

    tmpStatsDir=statsPath+".tmpdir"

    makeTmpStatsDirCmd = getMkdirCmd() + [tmpStatsDir]
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpStatsDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpStatsFiles = []
    statsTasks = set()

    for (bamIndex,bamPath) in enumerate(self.params.normalBamList + self.params.tumorBamList) :
        indexStr = str(bamIndex).zfill(3)
        tmpStatsFiles.append(os.path.join(tmpStatsDir,statsFilename+"."+ indexStr +".xml"))

        cmd = [ self.params.mantaStatsBin ]
        cmd.extend(["--output-file",tmpStatsFiles[-1]])
        cmd.extend(["--align-file",bamPath])

        statsTasks.add(self.addTask(preJoin(taskPrefix,"generateStats_"+indexStr),cmd,dependencies=dirTask))

    cmd = [ self.params.mantaMergeStatsBin ]
    cmd.extend(["--output-file",statsPath])
    for tmpStatsFile in tmpStatsFiles :
        cmd.extend(["--align-stats-file",tmpStatsFile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeStats"),cmd,dependencies=statsTasks,isForceLocal=True)

    nextStepWait = set()
    nextStepWait.add(mergeTask)

    if not self.params.isRetainTempFiles :
        rmStatsTmpCmd = getRmdirCmd() + [tmpStatsDir]
        rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmStatsTmpCmd,dependencies=mergeTask, isForceLocal=True)

    # summarize stats in format that's easier for human review
    cmd = [self.params.mantaStatsSummaryBin]
    cmd.extend(["--align-stats ", statsPath])
    cmd.extend(["--output-file", self.paths.getStatsSummaryPath()])
    self.addTask(preJoin(taskPrefix,"summarizeStats"),cmd,dependencies=mergeTask)

    return nextStepWait
Example #7
0
def callGenome(self, taskPrefix="", dependencies=None):
    """
    run variant caller on all genome segments
    """

    tmpSegmentDir = self.paths.getTmpSegmentDir()
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           getMkdirCmd() + [tmpSegmentDir],
                           dependencies=dependencies,
                           isForceLocal=True)

    segmentTasks = set()
    sampleCount = len(self.params.bamList)

    segFiles = TempVariantCallingSegmentFiles(sampleCount)

    for gsegGroup in self.getStrelkaGenomeSegmentGroupIterator(
            contigsExcludedFromGrouping=self.params.callContinuousVf):
        segmentTasks |= callGenomeSegment(self,
                                          gsegGroup,
                                          segFiles,
                                          dependencies=dirTask)

    if len(segmentTasks) == 0:
        raise Exception(
            "No genome regions to analyze. Possible target region parse error."
        )

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=segmentTasks)

    finishTasks = set()

    # merge various VCF outputs
    finishTasks.add(
        self.concatIndexVcf(taskPrefix, completeSegmentsTask,
                            segFiles.variants,
                            self.paths.getVariantsOutputPath(), "variants"))
    for sampleIndex in range(sampleCount):
        concatTask = self.concatIndexVcf(
            taskPrefix, completeSegmentsTask,
            segFiles.sample[sampleIndex].gvcf,
            self.paths.getGvcfOutputPath(sampleIndex),
            gvcfSampleLabel(sampleIndex))
        finishTasks.add(concatTask)
        if sampleIndex == 0:
            outputPath = self.paths.getGvcfOutputPath(sampleIndex)
            outputDirname = os.path.dirname(outputPath)
            outputBasename = os.path.basename(outputPath)

            def linkLegacy(extension):
                return "ln -s " + quote(
                    outputBasename + extension) + " " + quote(
                        self.paths.getGvcfLegacyFilename() + extension)

            linkCmd = linkLegacy("") + " && " + linkLegacy(".tbi")
            self.addTask(preJoin(taskPrefix, "addLegacyOutputLink"),
                         linkCmd,
                         dependencies=concatTask,
                         isForceLocal=True,
                         cwd=outputDirname)

    # merge segment stats:
    finishTasks.add(
        self.mergeRunStats(taskPrefix, completeSegmentsTask, segFiles.stats))

    if self.params.isWriteRealignedBam:

        def finishBam(tmpList, output, label):
            cmd = bamListCatCmd(self.params.samtoolsBin, tmpList, output)
            finishTasks.add(
                self.addTask(preJoin(taskPrefix, label + "_finalizeBAM"),
                             cmd,
                             dependencies=completeSegmentsTask))

        finishBam(segFiles.bamRealign, self.paths.getRealignedBamPath(),
                  "realigned")

    if not self.params.isRetainTempFiles:
        rmTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        self.addTask(preJoin(taskPrefix, "removeTmpDir"),
                     rmTmpCmd,
                     dependencies=finishTasks,
                     isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
Example #8
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    tmpGraphDir=self.paths.getTmpGraphDir()

    makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir]
    dirTask = self.addTask(preJoin(taskPrefix,"makeGraphTmpDir"), makeTmpGraphDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gsegGroup in getGenomeSegmentGroups(getNextGenomeSegment(self.params)) :
        assert(len(gsegGroup) != 0)
        gid=gsegGroup[0].id
        if len(gsegGroup) > 1 :
            gid += "_to_"+gsegGroup[-1].id
        tmpGraphFiles.append(self.paths.getTmpGraphFile(gid))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        for gseg in gsegGroup :
            graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTask=preJoin(taskPrefix,"makeLocusGraph_"+gid)
        graphTasks.add(self.addTask(graphTask,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    tmpGraphFileList = self.paths.getTmpGraphFileListPath()
    tmpGraphFileListTask = preJoin(taskPrefix,"mergeLocusGraphInputList")
    self.addWorkflowTask(tmpGraphFileListTask,listFileWorkflow(tmpGraphFileList,tmpGraphFiles),dependencies=graphTasks)

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    mergeCmd.extend(["--graph-file-list",tmpGraphFileList])
    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=tmpGraphFileListTask,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    if not self.params.isRetainTempFiles :
        rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
        rmTask=self.addTask(preJoin(taskPrefix,"removeTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = [self.params.mantaGraphStatsBin,"--global"]
    graphStatsCmd.extend(["--graph-file",graphPath])
    graphStatsCmd.extend(["--output-file",graphStatsPath])

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Example #9
0
def runLocusGraph(self, taskPrefix="", dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath = self.paths.getStatsPath()
    graphPath = self.paths.getGraphPath()
    graphStatsPath = self.paths.getGraphStatsPath()

    tmpGraphDir = self.paths.getTmpGraphDir()

    makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir]
    dirTask = self.addTask(preJoin(taskPrefix, "makeGraphTmpDir"),
                           makeTmpGraphDirCmd,
                           dependencies=dependencies,
                           isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gsegGroup in getGenomeSegmentGroups(getNextGenomeSegment(self.params)):
        assert (len(gsegGroup) != 0)
        gid = gsegGroup[0].id
        if len(gsegGroup) > 1:
            gid += "_to_" + gsegGroup[-1].id
        tmpGraphFiles.append(self.paths.getTmpGraphFile(gid))
        graphCmd = [self.params.mantaGraphBin]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats", statsPath])
        for gseg in gsegGroup:
            graphCmd.extend(["--region", gseg.bamRegion])
        graphCmd.extend(
            ["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(
            ["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref", self.params.referenceFasta])
        for bamPath in self.params.normalBamList:
            graphCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList:
            graphCmd.extend(["--tumor-align-file", bamPath])

        if self.params.isHighDepthFilter:
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair:
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA:
            graphCmd.append("--rna")

        graphTask = preJoin(taskPrefix, "makeLocusGraph_" + gid)
        graphTasks.add(
            self.addTask(graphTask,
                         graphCmd,
                         dependencies=dirTask,
                         memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0:
        raise Exception(
            "No SV Locus graphs to create. Possible target region parse error."
        )

    tmpGraphFileList = self.paths.getTmpGraphFileListPath()
    tmpGraphFileListTask = preJoin(taskPrefix, "mergeLocusGraphInputList")
    self.addWorkflowTask(tmpGraphFileListTask,
                         listFileWorkflow(tmpGraphFileList, tmpGraphFiles),
                         dependencies=graphTasks)

    mergeCmd = [self.params.mantaGraphMergeBin]
    mergeCmd.extend(["--output-file", graphPath])
    mergeCmd.extend(["--graph-file-list", tmpGraphFileList])
    mergeTask = self.addTask(preJoin(taskPrefix, "mergeLocusGraph"),
                             mergeCmd,
                             dependencies=tmpGraphFileListTask,
                             memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [self.params.mantaGraphCheckBin]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix, "checkLocusGraph"),
                             checkCmd,
                             dependencies=mergeTask,
                             memMb=self.params.mergeMemMb)

    if not self.params.isRetainTempFiles:
        rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
        rmTask = self.addTask(preJoin(taskPrefix, "removeTmpDir"),
                              rmGraphTmpCmd,
                              dependencies=mergeTask)

    graphStatsCmd = [self.params.mantaGraphStatsBin, "--global"]
    graphStatsCmd.extend(["--graph-file", graphPath])
    graphStatsCmd.extend(["--output-file", graphStatsPath])

    graphStatsTask = self.addTask(preJoin(taskPrefix, "locusGraphStats"),
                                  graphStatsCmd,
                                  dependencies=mergeTask,
                                  memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Example #10
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    graphFilename=os.path.basename(graphPath)
    tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir")

    makeTmpDirCmd = getMkdirCmd() + [tmpGraphDir]
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    def getGenomeSegmentGroups(params) :
        """
        Iterate segment groups and 'clump' small contigs together
        """
        minSegmentGroupSize=200000
        group = []
        headSize = 0
        for gseg in getNextGenomeSegment(self.params) :
            if headSize+gseg.size() <= minSegmentGroupSize :
                group.append(gseg)
                headSize += gseg.size()
            else :
                if len(group) != 0 : yield(group)
                group = [gseg]
                headSize = gseg.size()
        if len(group) != 0 : yield(group)

    for gsegGroup in getGenomeSegmentGroups(self.params) :
        assert(len(gsegGroup) != 0)
        gid=gsegGroup[0].id
        if len(gsegGroup) > 1 :
            gid += "_to_"+gsegGroup[-1].id
        tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gid+".bin"))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        for gseg in gsegGroup :
            graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gid)
        graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    for gfile in tmpGraphFiles :
        mergeCmd.extend(["--graph-file", gfile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
    rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = [self.params.mantaGraphStatsBin,"--global"]
    graphStatsCmd.extend(["--graph-file",graphPath])
    graphStatsCmd.extend(["--output-file",graphStatsPath])

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait