def runStats(self, taskPrefix="", dependencies=None): statsPath = self.paths.getStatsPath() statsFilename = os.path.basename(statsPath) tmpStatsDir = statsPath + ".tmpdir" makeTmpStatsDirCmd = getMkdirCmd() + [tmpStatsDir] dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"), makeTmpStatsDirCmd, dependencies=dependencies, isForceLocal=True) tmpStatsFiles = [] statsTasks = set() for (bamIndex, bamPath) in enumerate(self.params.normalBamList + self.params.tumorBamList): indexStr = str(bamIndex).zfill(3) tmpStatsFiles.append( os.path.join(tmpStatsDir, statsFilename + "." + indexStr + ".xml")) cmd = [self.params.mantaStatsBin] cmd.extend(["--output-file", tmpStatsFiles[-1]]) cmd.extend(["--align-file", bamPath]) statsTasks.add( self.addTask(preJoin(taskPrefix, "generateStats_" + indexStr), cmd, dependencies=dirTask)) cmd = [self.params.mantaMergeStatsBin] cmd.extend(["--output-file", statsPath]) for tmpStatsFile in tmpStatsFiles: cmd.extend(["--align-stats-file", tmpStatsFile]) mergeTask = self.addTask(preJoin(taskPrefix, "mergeStats"), cmd, dependencies=statsTasks, isForceLocal=True) nextStepWait = set() nextStepWait.add(mergeTask) if not self.params.isRetainTempFiles: rmStatsTmpCmd = getRmdirCmd() + [tmpStatsDir] rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"), rmStatsTmpCmd, dependencies=mergeTask, isForceLocal=True) # summarize stats in format that's easier for human review cmd = [self.params.mantaStatsSummaryBin] cmd.extend(["--align-stats ", statsPath]) cmd.extend(["--output-file", self.paths.getStatsSummaryPath()]) self.addTask(preJoin(taskPrefix, "summarizeStats"), cmd, dependencies=mergeTask) return nextStepWait
def getSequenceErrorEstimates(self, taskPrefix="", dependencies=None): """ Count sequence errors and use these to estimate error parameters """ mkDirTask = preJoin(taskPrefix, "makeTmpDir") tmpErrorEstimationDir = self.paths.getTmpErrorEstimationDir() mkDirCmd = getMkdirCmd() + [tmpErrorEstimationDir] self.addTask(mkDirTask, mkDirCmd, dependencies=dependencies, isForceLocal=True) estimationIntervals = getErrorEstimationIntervals(self.params) assert (len(estimationIntervals) != 0) # The count and estimation processes are currently independent for each sample sampleTasks = set() for sampleIndex in range(len(self.params.bamList)): sampleIndexStr = str(sampleIndex).zfill(3) sampleTask = preJoin(taskPrefix, "Sample" + sampleIndexStr) workflow = EstimateSequenceErrorWorkflowForSample( self.params, self.paths, estimationIntervals, sampleIndex) sampleTasks.add( self.addWorkflowTask(sampleTask, workflow, dependencies=mkDirTask)) if not self.params.isRetainTempFiles: rmTmpCmd = getRmdirCmd() + [tmpErrorEstimationDir] self.addTask(preJoin(taskPrefix, "removeTmpDir"), rmTmpCmd, dependencies=sampleTasks, isForceLocal=True) nextStepWait = sampleTasks return nextStepWait
def callGenome(self, taskPrefix="", dependencies=None): """ run variant caller on all genome segments """ tmpSegmentDir = self.paths.getTmpSegmentDir() dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"), getMkdirCmd() + [tmpSegmentDir], dependencies=dependencies, isForceLocal=True) segmentTasks = set() segFiles = TempSegmentFiles() for gseg in getNextGenomeSegment(self.params): segmentTasks |= callGenomeSegment(self, gseg, segFiles, dependencies=dirTask) if len(segmentTasks) == 0: raise Exception( "No genome regions to analyze. Possible target region parse error." ) # create a checkpoint for all segments: completeSegmentsTask = self.addTask(preJoin(taskPrefix, "completedAllGenomeSegments"), dependencies=segmentTasks) finishTasks = set() finishTasks.add( self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.denovo, self.paths.getDenovoOutputPath(), "denovo")) # merge segment stats: finishTasks.add( self.mergeRunStats(taskPrefix, completeSegmentsTask, segFiles.stats)) if self.params.isOutputCallableRegions: finishTasks.add( self.concatIndexBed(taskPrefix, completeSegmentsTask, segFiles.callable, self.paths.getRegionOutputPath(), "callableRegions")) if not self.params.isRetainTempFiles: rmStatsTmpCmd = getRmdirCmd() + [tmpSegmentDir] rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"), rmStatsTmpCmd, dependencies=finishTasks, isForceLocal=True) nextStepWait = finishTasks return nextStepWait
def callGenome(self, taskPrefix="", dependencies=None): """ run counter on all genome segments """ tmpSegmentDir = self.paths.getTmpSegmentDir() dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"), getMkdirCmd() + [tmpSegmentDir], dependencies=dependencies, isForceLocal=True) segmentTasks = set() segFiles = TempSegmentFiles() for gseg in getNextGenomeSegment(self.params): segmentTasks |= callGenomeSegment(self, gseg, segFiles, dependencies=dirTask) if len(segmentTasks) == 0: raise Exception( "No genome regions to analyze. Possible target region parse error." ) # create a checkpoint for all segments: completeSegmentsTask = self.addTask(preJoin(taskPrefix, "completedAllGenomeSegments"), dependencies=segmentTasks) finishTasks = set() # merge segment stats: finishTasks.add( mergeSequenceErrorCounts(self, taskPrefix, completeSegmentsTask, segFiles.counts)) if self.params.isReportObservedIndels: finishTasks.add( self.concatIndexBed(taskPrefix, completeSegmentsTask, segFiles.observedIndelBed, self.paths.getObservedIndelBedPath(), "observedIndels")) if not self.params.isRetainTempFiles: rmTmpCmd = getRmdirCmd() + [tmpSegmentDir] rmTask = self.addTask(preJoin(taskPrefix, "rmTmpDir"), rmTmpCmd, dependencies=finishTasks, isForceLocal=True) nextStepWait = finishTasks return nextStepWait
def callGenome(self,taskPrefix="",dependencies=None): """ run strelka on all genome segments """ tmpSegmentDir=self.paths.getTmpSegmentDir() dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), getMkdirCmd() + [tmpSegmentDir], dependencies=dependencies, isForceLocal=True) segmentTasks = set() segFiles = TempVariantCallingSegmentFiles() for gsegGroup in self.getStrelkaGenomeSegmentGroupIterator() : segmentTasks |= callGenomeSegment(self, gsegGroup, segFiles, dependencies=dirTask) if len(segmentTasks) == 0 : raise Exception("No genome regions to analyze. Possible target region parse error.") # create a checkpoint for all segments: completeSegmentsTask = self.addTask(preJoin(taskPrefix,"completedAllGenomeSegments"),dependencies=segmentTasks) finishTasks = set() finishTasks.add(self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.snv, self.paths.getSnvOutputPath(),"SNV")) finishTasks.add(self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.indel, self.paths.getIndelOutputPath(),"Indel")) # merge segment stats: finishTasks.add(self.mergeRunStats(taskPrefix,completeSegmentsTask, segFiles.stats)) if self.params.isOutputCallableRegions : finishTasks.add(self.concatIndexBed(taskPrefix, completeSegmentsTask, segFiles.callable, self.paths.getRegionOutputPath(), "callableRegions")) if self.params.isWriteRealignedBam : def catRealignedBam(label, segmentList) : output = self.paths.getRealignedBamPath(label) bamCatCmd = bamListCatCmd(self.params.samtoolsBin, segmentList, output) bamCatTaskLabel = preJoin(taskPrefix, "realignedBamCat_" + label) finishTasks.add(self.addTask(bamCatTaskLabel, bamCatCmd, dependencies=completeSegmentsTask)) catRealignedBam("normal", segFiles.normalRealign) catRealignedBam("tumor", segFiles.tumorRealign) if not self.params.isRetainTempFiles : rmTmpCmd = getRmdirCmd() + [tmpSegmentDir] self.addTask(preJoin(taskPrefix,"removeTmpDir"), rmTmpCmd, dependencies=finishTasks, isForceLocal=True) nextStepWait = finishTasks return nextStepWait
def runStats(self,taskPrefix="",dependencies=None) : statsPath=self.paths.getStatsPath() statsFilename=os.path.basename(statsPath) tmpStatsDir=statsPath+".tmpdir" makeTmpStatsDirCmd = getMkdirCmd() + [tmpStatsDir] dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpStatsDirCmd, dependencies=dependencies, isForceLocal=True) tmpStatsFiles = [] statsTasks = set() for (bamIndex,bamPath) in enumerate(self.params.normalBamList + self.params.tumorBamList) : indexStr = str(bamIndex).zfill(3) tmpStatsFiles.append(os.path.join(tmpStatsDir,statsFilename+"."+ indexStr +".xml")) cmd = [ self.params.mantaStatsBin ] cmd.extend(["--output-file",tmpStatsFiles[-1]]) cmd.extend(["--align-file",bamPath]) statsTasks.add(self.addTask(preJoin(taskPrefix,"generateStats_"+indexStr),cmd,dependencies=dirTask)) cmd = [ self.params.mantaMergeStatsBin ] cmd.extend(["--output-file",statsPath]) for tmpStatsFile in tmpStatsFiles : cmd.extend(["--align-stats-file",tmpStatsFile]) mergeTask = self.addTask(preJoin(taskPrefix,"mergeStats"),cmd,dependencies=statsTasks,isForceLocal=True) nextStepWait = set() nextStepWait.add(mergeTask) if not self.params.isRetainTempFiles : rmStatsTmpCmd = getRmdirCmd() + [tmpStatsDir] rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmStatsTmpCmd,dependencies=mergeTask, isForceLocal=True) # summarize stats in format that's easier for human review cmd = [self.params.mantaStatsSummaryBin] cmd.extend(["--align-stats ", statsPath]) cmd.extend(["--output-file", self.paths.getStatsSummaryPath()]) self.addTask(preJoin(taskPrefix,"summarizeStats"),cmd,dependencies=mergeTask) return nextStepWait
def callGenome(self, taskPrefix="", dependencies=None): """ run variant caller on all genome segments """ tmpSegmentDir = self.paths.getTmpSegmentDir() dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"), getMkdirCmd() + [tmpSegmentDir], dependencies=dependencies, isForceLocal=True) segmentTasks = set() sampleCount = len(self.params.bamList) segFiles = TempVariantCallingSegmentFiles(sampleCount) for gsegGroup in self.getStrelkaGenomeSegmentGroupIterator( contigsExcludedFromGrouping=self.params.callContinuousVf): segmentTasks |= callGenomeSegment(self, gsegGroup, segFiles, dependencies=dirTask) if len(segmentTasks) == 0: raise Exception( "No genome regions to analyze. Possible target region parse error." ) # create a checkpoint for all segments: completeSegmentsTask = self.addTask(preJoin(taskPrefix, "completedAllGenomeSegments"), dependencies=segmentTasks) finishTasks = set() # merge various VCF outputs finishTasks.add( self.concatIndexVcf(taskPrefix, completeSegmentsTask, segFiles.variants, self.paths.getVariantsOutputPath(), "variants")) for sampleIndex in range(sampleCount): concatTask = self.concatIndexVcf( taskPrefix, completeSegmentsTask, segFiles.sample[sampleIndex].gvcf, self.paths.getGvcfOutputPath(sampleIndex), gvcfSampleLabel(sampleIndex)) finishTasks.add(concatTask) if sampleIndex == 0: outputPath = self.paths.getGvcfOutputPath(sampleIndex) outputDirname = os.path.dirname(outputPath) outputBasename = os.path.basename(outputPath) def linkLegacy(extension): return "ln -s " + quote( outputBasename + extension) + " " + quote( self.paths.getGvcfLegacyFilename() + extension) linkCmd = linkLegacy("") + " && " + linkLegacy(".tbi") self.addTask(preJoin(taskPrefix, "addLegacyOutputLink"), linkCmd, dependencies=concatTask, isForceLocal=True, cwd=outputDirname) # merge segment stats: finishTasks.add( self.mergeRunStats(taskPrefix, completeSegmentsTask, segFiles.stats)) if self.params.isWriteRealignedBam: def finishBam(tmpList, output, label): cmd = bamListCatCmd(self.params.samtoolsBin, tmpList, output) finishTasks.add( self.addTask(preJoin(taskPrefix, label + "_finalizeBAM"), cmd, dependencies=completeSegmentsTask)) finishBam(segFiles.bamRealign, self.paths.getRealignedBamPath(), "realigned") if not self.params.isRetainTempFiles: rmTmpCmd = getRmdirCmd() + [tmpSegmentDir] self.addTask(preJoin(taskPrefix, "removeTmpDir"), rmTmpCmd, dependencies=finishTasks, isForceLocal=True) nextStepWait = finishTasks return nextStepWait
def runLocusGraph(self,taskPrefix="",dependencies=None): """ Create the full SV locus graph """ statsPath=self.paths.getStatsPath() graphPath=self.paths.getGraphPath() graphStatsPath=self.paths.getGraphStatsPath() tmpGraphDir=self.paths.getTmpGraphDir() makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir] dirTask = self.addTask(preJoin(taskPrefix,"makeGraphTmpDir"), makeTmpGraphDirCmd, dependencies=dependencies, isForceLocal=True) tmpGraphFiles = [] graphTasks = set() for gsegGroup in getGenomeSegmentGroups(getNextGenomeSegment(self.params)) : assert(len(gsegGroup) != 0) gid=gsegGroup[0].id if len(gsegGroup) > 1 : gid += "_to_"+gsegGroup[-1].id tmpGraphFiles.append(self.paths.getTmpGraphFile(gid)) graphCmd = [ self.params.mantaGraphBin ] graphCmd.extend(["--output-file", tmpGraphFiles[-1]]) graphCmd.extend(["--align-stats",statsPath]) for gseg in gsegGroup : graphCmd.extend(["--region",gseg.bamRegion]) graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize]) graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations]) graphCmd.extend(["--ref",self.params.referenceFasta]) for bamPath in self.params.normalBamList : graphCmd.extend(["--align-file",bamPath]) for bamPath in self.params.tumorBamList : graphCmd.extend(["--tumor-align-file",bamPath]) if self.params.isHighDepthFilter : graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()]) if self.params.isIgnoreAnomProperPair : graphCmd.append("--ignore-anom-proper-pair") if self.params.isRNA : graphCmd.append("--rna") graphTask=preJoin(taskPrefix,"makeLocusGraph_"+gid) graphTasks.add(self.addTask(graphTask,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb)) if len(tmpGraphFiles) == 0 : raise Exception("No SV Locus graphs to create. Possible target region parse error.") tmpGraphFileList = self.paths.getTmpGraphFileListPath() tmpGraphFileListTask = preJoin(taskPrefix,"mergeLocusGraphInputList") self.addWorkflowTask(tmpGraphFileListTask,listFileWorkflow(tmpGraphFileList,tmpGraphFiles),dependencies=graphTasks) mergeCmd = [ self.params.mantaGraphMergeBin ] mergeCmd.extend(["--output-file", graphPath]) mergeCmd.extend(["--graph-file-list",tmpGraphFileList]) mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=tmpGraphFileListTask,memMb=self.params.mergeMemMb) # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but # this makes the check much more clear: checkCmd = [ self.params.mantaGraphCheckBin ] checkCmd.extend(["--graph-file", graphPath]) checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb) if not self.params.isRetainTempFiles : rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir] rmTask=self.addTask(preJoin(taskPrefix,"removeTmpDir"),rmGraphTmpCmd,dependencies=mergeTask) graphStatsCmd = [self.params.mantaGraphStatsBin,"--global"] graphStatsCmd.extend(["--graph-file",graphPath]) graphStatsCmd.extend(["--output-file",graphStatsPath]) graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb) nextStepWait = set() nextStepWait.add(checkTask) return nextStepWait
def runLocusGraph(self, taskPrefix="", dependencies=None): """ Create the full SV locus graph """ statsPath = self.paths.getStatsPath() graphPath = self.paths.getGraphPath() graphStatsPath = self.paths.getGraphStatsPath() tmpGraphDir = self.paths.getTmpGraphDir() makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir] dirTask = self.addTask(preJoin(taskPrefix, "makeGraphTmpDir"), makeTmpGraphDirCmd, dependencies=dependencies, isForceLocal=True) tmpGraphFiles = [] graphTasks = set() for gsegGroup in getGenomeSegmentGroups(getNextGenomeSegment(self.params)): assert (len(gsegGroup) != 0) gid = gsegGroup[0].id if len(gsegGroup) > 1: gid += "_to_" + gsegGroup[-1].id tmpGraphFiles.append(self.paths.getTmpGraphFile(gid)) graphCmd = [self.params.mantaGraphBin] graphCmd.extend(["--output-file", tmpGraphFiles[-1]]) graphCmd.extend(["--align-stats", statsPath]) for gseg in gsegGroup: graphCmd.extend(["--region", gseg.bamRegion]) graphCmd.extend( ["--min-candidate-sv-size", self.params.minCandidateVariantSize]) graphCmd.extend( ["--min-edge-observations", self.params.minEdgeObservations]) graphCmd.extend(["--ref", self.params.referenceFasta]) for bamPath in self.params.normalBamList: graphCmd.extend(["--align-file", bamPath]) for bamPath in self.params.tumorBamList: graphCmd.extend(["--tumor-align-file", bamPath]) if self.params.isHighDepthFilter: graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()]) if self.params.isIgnoreAnomProperPair: graphCmd.append("--ignore-anom-proper-pair") if self.params.isRNA: graphCmd.append("--rna") graphTask = preJoin(taskPrefix, "makeLocusGraph_" + gid) graphTasks.add( self.addTask(graphTask, graphCmd, dependencies=dirTask, memMb=self.params.estimateMemMb)) if len(tmpGraphFiles) == 0: raise Exception( "No SV Locus graphs to create. Possible target region parse error." ) tmpGraphFileList = self.paths.getTmpGraphFileListPath() tmpGraphFileListTask = preJoin(taskPrefix, "mergeLocusGraphInputList") self.addWorkflowTask(tmpGraphFileListTask, listFileWorkflow(tmpGraphFileList, tmpGraphFiles), dependencies=graphTasks) mergeCmd = [self.params.mantaGraphMergeBin] mergeCmd.extend(["--output-file", graphPath]) mergeCmd.extend(["--graph-file-list", tmpGraphFileList]) mergeTask = self.addTask(preJoin(taskPrefix, "mergeLocusGraph"), mergeCmd, dependencies=tmpGraphFileListTask, memMb=self.params.mergeMemMb) # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but # this makes the check much more clear: checkCmd = [self.params.mantaGraphCheckBin] checkCmd.extend(["--graph-file", graphPath]) checkTask = self.addTask(preJoin(taskPrefix, "checkLocusGraph"), checkCmd, dependencies=mergeTask, memMb=self.params.mergeMemMb) if not self.params.isRetainTempFiles: rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir] rmTask = self.addTask(preJoin(taskPrefix, "removeTmpDir"), rmGraphTmpCmd, dependencies=mergeTask) graphStatsCmd = [self.params.mantaGraphStatsBin, "--global"] graphStatsCmd.extend(["--graph-file", graphPath]) graphStatsCmd.extend(["--output-file", graphStatsPath]) graphStatsTask = self.addTask(preJoin(taskPrefix, "locusGraphStats"), graphStatsCmd, dependencies=mergeTask, memMb=self.params.mergeMemMb) nextStepWait = set() nextStepWait.add(checkTask) return nextStepWait
def runLocusGraph(self,taskPrefix="",dependencies=None): """ Create the full SV locus graph """ statsPath=self.paths.getStatsPath() graphPath=self.paths.getGraphPath() graphStatsPath=self.paths.getGraphStatsPath() graphFilename=os.path.basename(graphPath) tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir") makeTmpDirCmd = getMkdirCmd() + [tmpGraphDir] dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpDirCmd, dependencies=dependencies, isForceLocal=True) tmpGraphFiles = [] graphTasks = set() def getGenomeSegmentGroups(params) : """ Iterate segment groups and 'clump' small contigs together """ minSegmentGroupSize=200000 group = [] headSize = 0 for gseg in getNextGenomeSegment(self.params) : if headSize+gseg.size() <= minSegmentGroupSize : group.append(gseg) headSize += gseg.size() else : if len(group) != 0 : yield(group) group = [gseg] headSize = gseg.size() if len(group) != 0 : yield(group) for gsegGroup in getGenomeSegmentGroups(self.params) : assert(len(gsegGroup) != 0) gid=gsegGroup[0].id if len(gsegGroup) > 1 : gid += "_to_"+gsegGroup[-1].id tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gid+".bin")) graphCmd = [ self.params.mantaGraphBin ] graphCmd.extend(["--output-file", tmpGraphFiles[-1]]) graphCmd.extend(["--align-stats",statsPath]) for gseg in gsegGroup : graphCmd.extend(["--region",gseg.bamRegion]) graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize]) graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations]) graphCmd.extend(["--ref",self.params.referenceFasta]) for bamPath in self.params.normalBamList : graphCmd.extend(["--align-file",bamPath]) for bamPath in self.params.tumorBamList : graphCmd.extend(["--tumor-align-file",bamPath]) if self.params.isHighDepthFilter : graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()]) if self.params.isIgnoreAnomProperPair : graphCmd.append("--ignore-anom-proper-pair") if self.params.isRNA : graphCmd.append("--rna") graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gid) graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb)) if len(tmpGraphFiles) == 0 : raise Exception("No SV Locus graphs to create. Possible target region parse error.") mergeCmd = [ self.params.mantaGraphMergeBin ] mergeCmd.extend(["--output-file", graphPath]) for gfile in tmpGraphFiles : mergeCmd.extend(["--graph-file", gfile]) mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks,memMb=self.params.mergeMemMb) # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but # this makes the check much more clear: checkCmd = [ self.params.mantaGraphCheckBin ] checkCmd.extend(["--graph-file", graphPath]) checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb) rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir] rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmGraphTmpCmd,dependencies=mergeTask) graphStatsCmd = [self.params.mantaGraphStatsBin,"--global"] graphStatsCmd.extend(["--graph-file",graphPath]) graphStatsCmd.extend(["--output-file",graphStatsPath]) graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb) nextStepWait = set() nextStepWait.add(checkTask) return nextStepWait