Example #1
0
def _getDepthShared(self,taskPrefix, dependencies, bamList, outputPath, depthFunc) :
    """
    estimate chrom depth using the specified depthFunc to compute per-sample depth
    """

    outputFilename=os.path.basename(outputPath)

    tmpDir=outputPath+".tmpdir"
    makeTmpDirCmd = getMkdirCmd() + [tmpDir]
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpFiles = []
    scatterTasks = set()

    for (bamIndex, bamFile) in enumerate(bamList) :
        indexStr = str(bamIndex).zfill(3)
        tmpFiles.append(os.path.join(tmpDir,outputFilename+"."+ indexStr +".txt"))
        scatterTasks |= setzer(depthFunc(self,taskPrefix+"_sample"+indexStr,dirTask,bamFile,tmpFiles[-1]))

    cmd = [ self.params.mergeChromDepth ]
    cmd.extend(["--out",outputPath])
    for tmpFile in tmpFiles :
        cmd.extend(["--in",tmpFile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeChromDepth"),cmd,dependencies=scatterTasks,isForceLocal=True)

    nextStepWait = set()
    nextStepWait.add(mergeTask)

    if not self.params.isRetainTempFiles :
        rmTmpCmd = getRmdirCmd() + [tmpDir]
        rmTask=self.addTask(preJoin(taskPrefix,"removeTmpDir"),rmTmpCmd,dependencies=mergeTask, isForceLocal=True)

    return nextStepWait
Example #2
0
def mergeSupportBams(self, mergeBamTasks, taskPrefix="", isNormal=True, bamIdx=0, dependencies=None) :

    if isNormal:
        bamList = self.params.normalBamList
    else:
        bamList = self.params.tumorBamList

    for bamPath in bamList:
        # merge support bams
        supportBamFile = self.paths.getFinalSupportBamPath(bamPath, bamIdx)
        mergeCmd = [ sys.executable, self.params.mantaMergeBam,
                     self.params.samtoolsBin,
                     self.paths.getSortedSupportBamMask(bamIdx),
                     supportBamFile,
                     self.paths.getSupportBamListPath(bamIdx) ]

        mergeBamTask=self.addTask(preJoin(taskPrefix,"merge_evidenceBam_%s" % (bamIdx)),
                                  mergeCmd, dependencies=dependencies)
        mergeBamTasks.add(mergeBamTask)

        # index the filtered bam
        ### TODO still needs to handle the case where supportBamFile does not exist
        indexCmd = [ self.params.samtoolsBin, "index", supportBamFile ]
        indexBamTask = self.addTask(preJoin(taskPrefix,"index_evidenceBam_%s" % (bamIdx)),
                                    indexCmd, dependencies=mergeBamTask)
        mergeBamTasks.add(indexBamTask)

        bamIdx += 1

    return bamIdx
Example #3
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    graphFilename=os.path.basename(graphPath)
    tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir")
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpGraphDir, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gseg in getNextGenomeSegment(self.params) :

        tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gseg.id+".bin"))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gseg.id)
        graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    for gfile in tmpGraphFiles :
        mergeCmd.extend(["--graph-file", gfile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    rmGraphTmpCmd = "rm -rf " + tmpGraphDir
    #rmTask=self.addTask(preJoin(taskPrefix,"rmGraphTmp"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = self.params.mantaGraphStatsBin
    graphStatsCmd += " --global"
    graphStatsCmd += " --graph-file " + graphPath
    graphStatsCmd += " >| " + graphStatsPath

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Example #4
0
    def depthFunc(self,taskPrefix,dependencies,bamFile,outFile) :
        outputPath=outFile
        outputFilename=os.path.basename(outputPath)

        tmpDir=os.path.join(outputPath+".tmpdir")
        makeTmpDirCmd = getMkdirCmd() + [tmpDir]
        dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpDirCmd, dependencies=dependencies, isForceLocal=True)

        tmpFiles = []
        scatterTasks = set()

        def getChromosomeGroups(params) :
            """
            Iterate through chromosomes/contigs and group small contigs together. This functions as a generator yielding
            successive contig groups.
            """
            minSize=200000
            group = []
            headSize = 0

            chromCount = len(params.chromSizes)
            assert(len(params.chromOrder) == chromCount)
            for chromIndex in range(chromCount) :
                chromLabel = params.chromOrder[chromIndex]
                if chromLabel in params.chromIsSkipped : continue

                chromSize = params.chromSizes[chromLabel]
                if headSize+chromSize <= minSize :
                    group.append((chromIndex,chromLabel))
                    headSize += chromSize
                else :
                    if len(group) != 0 : yield(group)
                    group = [(chromIndex,chromLabel)]
                    headSize = chromSize
            if len(group) != 0 : yield(group)

        for chromGroup in getChromosomeGroups(self.params) :
            assert(len(chromGroup) > 0)
            cid = getRobustChromId(chromGroup[0][0], chromGroup[0][1])
            if len(chromGroup) > 1 :
                cid += "_to_"+getRobustChromId(chromGroup[-1][0], chromGroup[-1][1])
            tmpFiles.append(os.path.join(tmpDir,outputFilename+"_"+cid))
            cmd = [self.params.getChromDepthBin,"--ref", self.params.referenceFasta, "--align-file", bamFile, "--output", tmpFiles[-1]]
            for (chromIndex,chromLabel) in chromGroup :
                cmd.extend(["--chrom",chromLabel])
            scatterTasks.add(self.addTask(preJoin(taskPrefix,"estimateChromDepth_"+cid),cmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

        assert(len(tmpFiles) != 0)

        catCmd = [self.params.catScript,"--output",outputPath]+tmpFiles
        catTask = self.addTask(preJoin(taskPrefix,"catChromDepth"),catCmd,dependencies=scatterTasks, isForceLocal=True)

        nextStepWait = set()
        nextStepWait.add(catTask)

        return nextStepWait
Example #5
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    graphFilename=os.path.basename(graphPath)
    tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir")
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpGraphDir, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gseg in getNextGenomeSegment(self.params) :

        tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gseg.id+".bin"))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        graphCmd.extend(["--region",gseg.bamRegion])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gseg.id)
        graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask))

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    for gfile in tmpGraphFiles :
        mergeCmd.extend(["--graph-file", gfile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks)

    rmGraphTmpCmd = "rm -rf " + tmpGraphDir
    #rmTask=self.addTask(preJoin(taskPrefix,"rmGraphTmp"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = self.params.mantaGraphStatsBin
    graphStatsCmd += " --global"
    graphStatsCmd += " --graph-file " + graphPath
    graphStatsCmd += " >| " + graphStatsPath

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask)

    nextStepWait = set()
    nextStepWait.add(mergeTask)
    return nextStepWait
Example #6
0
    def extractSmall(inPath, outPath) :
        maxSize = int(self.params.minScoredVariantSize) - 1
        if maxSize < 1 : return

        smallCmd = getExtractSmallCmd(maxSize, inPath, outPath)
        smallLabel=preJoin(taskPrefix,"extractSmallIndels")
        nextStepWait.add(self.addTask(smallLabel, smallCmd, dependencies=candSortTask, isForceLocal=True))
Example #7
0
def runStats(self,taskPrefix="",dependencies=None) :

    statsPath=self.paths.getStatsPath()
    statsFilename=os.path.basename(statsPath)

    tmpStatsDir=statsPath+".tmpdir"

    makeTmpStatsDirCmd = getMkdirCmd() + [tmpStatsDir]
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpStatsDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpStatsFiles = []
    statsTasks = set()

    for (bamIndex,bamPath) in enumerate(self.params.normalBamList + self.params.tumorBamList) :
        indexStr = str(bamIndex).zfill(3)
        tmpStatsFiles.append(os.path.join(tmpStatsDir,statsFilename+"."+ indexStr +".xml"))

        cmd = [ self.params.mantaStatsBin ]
        cmd.extend(["--output-file",tmpStatsFiles[-1]])
        cmd.extend(["--align-file",bamPath])

        statsTasks.add(self.addTask(preJoin(taskPrefix,"generateStats_"+indexStr),cmd,dependencies=dirTask))

    cmd = [ self.params.mantaMergeStatsBin ]
    cmd.extend(["--output-file",statsPath])
    for tmpStatsFile in tmpStatsFiles :
        cmd.extend(["--align-stats-file",tmpStatsFile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeStats"),cmd,dependencies=statsTasks,isForceLocal=True)

    nextStepWait = set()
    nextStepWait.add(mergeTask)

    if not self.params.isRetainTempFiles :
        rmStatsTmpCmd = getRmdirCmd() + [tmpStatsDir]
        rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmStatsTmpCmd,dependencies=mergeTask, isForceLocal=True)

    # summarize stats in format that's easier for human review
    cmd = [self.params.mantaStatsSummaryBin]
    cmd.extend(["--align-stats ", statsPath])
    cmd.extend(["--output-file", self.paths.getStatsSummaryPath()])
    self.addTask(preJoin(taskPrefix,"summarizeStats"),cmd,dependencies=mergeTask)

    return nextStepWait
Example #8
0
    def sortVcfs(pathList, outPath, label, isDiploid=False) :
        if len(pathList) == 0 : return set()

        # make header modifications to first vcf in list of files to be sorted:
        headerFixTask=preJoin(taskPrefix,"fixVcfHeader_"+label)
        def getHeaderFixCmd(fileName) :
            tmpName=fileName+".reheader.tmp"
            cmd  = "\"%s\" -E \"%s\"" % (sys.executable, self.params.vcfCmdlineSwapper)
            cmd += ' "' + " ".join(self.params.configCommandLine) + '"'
            cmd += " < \"%s\" > \"%s\"" % (fileName,tmpName)
            cmd += " && " + " ".join(getMvCmd()) +  " \"%s\" \"%s\"" % (tmpName, fileName)
            return cmd

        self.addTask(headerFixTask,getHeaderFixCmd(pathList[0]),dependencies=hygenTasks,isForceLocal=True)

        sortCmd = getVcfSortCmd(pathList, outPath, isDiploid)
        sortTask=self.addTask(preJoin(taskPrefix,"sort_"+label),sortCmd,dependencies=headerFixTask)
        nextStepWait.add(self.addTask(preJoin(taskPrefix,"tabix_"+label),getVcfTabixCmd(outPath),dependencies=sortTask,isForceLocal=True))
        return sortTask
Example #9
0
        def catRealignedBam(label, segmentList):
            output = self.paths.getRealignedBamPath(label)

            bamCatCmd = bamListCatCmd(self.params.samtoolsBin, segmentList,
                                      output)
            bamCatTaskLabel = preJoin(taskPrefix, "realignedBamCat_" + label)

            finishTasks.add(
                self.addTask(bamCatTaskLabel,
                             bamCatCmd,
                             dependencies=completeSegmentsTask))
Example #10
0
def _runDepthShared(self,taskPrefix,dependencies, depthFunc) :
    """
    estimate chrom depth using the specified depthFunc to compute per-sample dpeth
    """

    bamList=[]
    if len(self.params.normalBamList) :
        bamList = self.params.normalBamList
    elif len(self.params.tumorBamList) :
        bamList = self.params.tumorBamList
    else :
        return set()

    outputPath=self.paths.getChromDepth()
    outputFilename=os.path.basename(outputPath)

    tmpDir=outputPath+".tmpdir"
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpDir, dependencies=dependencies, isForceLocal=True)

    tmpFiles = []
    scatterTasks = set()

    for (bamIndex, bamFile) in enumerate(bamList) :
        indexStr = str(bamIndex).zfill(3)
        tmpFiles.append(os.path.join(tmpDir,outputFilename+"."+ indexStr +".txt"))
        scatterTasks |= setzer(depthFunc(self,taskPrefix+"_sample"+indexStr,dirTask,bamFile,tmpFiles[-1]))

    cmd = [ self.params.mergeChromDepth ]
    cmd.extend(["--out",outputPath])
    for tmpFile in tmpFiles :
        cmd.extend(["--in",tmpFile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeChromDepth"),cmd,dependencies=scatterTasks,isForceLocal=True)

    nextStepWait = set()
    nextStepWait.add(mergeTask)

    rmTmpCmd = "rm -rf " + tmpDir
    rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmTmpCmd,dependencies=mergeTask, isForceLocal=True)

    return nextStepWait
Example #11
0
        def sortRealignBam(label, sortList) :
            unsorted = self.paths.getTmpUnsortRealignBamPath(gid, label)
            sorted   = self.paths.getTmpRealignBamPath(gid, label)
            sortList.append(sorted)

            # adjust sorted to remove the ".bam" suffix
            sorted = sorted[:-4]
            sortCmd="\"%s\" sort \"%s\" \"%s\" && rm -f \"%s\"" % (self.params.samtoolsBin,unsorted,sorted,unsorted)

            sortTaskLabel=preJoin(taskPrefix,"sortRealignedSegment_"+label+"_"+gid)
            self.addTask(sortTaskLabel,sortCmd,dependencies=callTask,memMb=self.params.callMemMb)
            nextStepWait.add(sortTaskLabel)
Example #12
0
def estimateParametersFromErrorCounts(self, sampleIndex, taskPrefix="", dependencies=None) :
    """
    Estimate variant error parameters from sequencing error count data
    """

    runEstimateLabel=preJoin(taskPrefix,"estimateVariantErrorRates")
    runEstimateCmd=[self.params.estimateVariantErrorRatesBin]
    runEstimateCmd.extend(["--counts-file", self.paths.getErrorCountsOutputPath(sampleIndex)])
    runEstimateCmd.extend(["--theta-file",self.params.thetaParamFile])
    runEstimateCmd.extend(["--output-file", self.paths.getIndelErrorModelPath(sampleIndex)])
    runEstimateCmd.extend(["--fallback-file",self.params.indelErrorRateDefault])
    return self.addTask(runEstimateLabel, runEstimateCmd, dependencies=dependencies, isForceLocal=True)
Example #13
0
def summarizeStats(self, taskPrefix="", dependencies=None) :
    statsPath=self.paths.getStatsPath()

    summaryTasks = set()
    # summarize stats in format that's easier for human review
    cmd = [self.params.mantaStatsSummaryBin]
    cmd.extend(["--align-stats ", statsPath])
    cmd.extend(["--output-file", self.paths.getStatsSummaryPath()])
    summarizeTask = self.addTask(preJoin(taskPrefix,"summarizeStats"),cmd,dependencies=dependencies, isForceLocal=True)
    summaryTasks.add(summarizeTask)

    return summaryTasks
Example #14
0
    def finishVcf(tmpList, output, label) :
        assert(len(tmpList) > 0)

        if len(tmpList) > 1 :
            catCmd=[self.params.bgcatBin,"-o",output]
            catCmd.extend(tmpList)
            catCmd = " ".join(catCmd)
        else :
            catCmd="mv -f %s %s" % (tmpList[0],output)

        catCmd += " && %s -p vcf %s" % (self.params.tabixBin, output)
        finishTasks.add(self.addTask(preJoin(taskPrefix,label+"_finalizeVCF"), catCmd, dependencies=completeSegmentsTask))
Example #15
0
def callGenome(self,taskPrefix="",dependencies=None):
    """
    run variant caller on all genome segments
    """

    tmpGraphDir=self.paths.getTmpSegmentDir()
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpGraphDir, dependencies=dependencies, isForceLocal=True)

    graphTasks = set()

    segFiles = TempSegmentFiles()
    for gseg in getNextGenomeSegment(self.params) :

        graphTasks |= callGenomeSegment(self, gseg, segFiles, dependencies=dirTask)

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,"completedAllGenomeSegments"),dependencies=graphTasks)

    finishTasks = set()

    def finishVcf(tmpList, output, label) :
        assert(len(tmpList) > 0)

        if len(tmpList) > 1 :
            catCmd=[self.params.bgcatBin,"-o",output]
            catCmd.extend(tmpList)
            catCmd = " ".join(catCmd)
        else :
            catCmd="mv -f %s %s" % (tmpList[0],output)

        catCmd += " && %s -p vcf %s" % (self.params.tabixBin, output)
        finishTasks.add(self.addTask(preJoin(taskPrefix,label+"_finalizeVCF"), catCmd, dependencies=completeSegmentsTask))

    finishVcf(segFiles.gvcf, self.paths.getGvcfOutputPath(),"gVCF")

    cleanTask=self.addTask(preJoin(taskPrefix,"cleanTmpDir"), "rm -rf "+tmpGraphDir, dependencies=finishTasks, isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
def countGenomeSegment(self,
                       sampleIndex,
                       gseg,
                       segFiles,
                       taskPrefix="",
                       dependencies=None):
    """
    Extract sequencing error count data from the genome segment specified by gseg.bamRegion
    """

    genomeSegmentLabel = gseg.id

    segCmd = [self.params.getCountsBin]

    segCmd.extend(["--region", gseg.bamRegion])
    segCmd.extend(["--ref", self.params.referenceFasta])
    segCmd.extend(["-genome-size", str(self.params.totalKnownReferenceSize)])
    segCmd.extend(["-max-indel-size", "50"])

    segFiles.counts.append(
        self.paths.getTmpSegmentErrorCountsPath(sampleIndex,
                                                genomeSegmentLabel))
    segCmd.extend(["--counts-file", segFiles.counts[-1]])

    segFiles.nonEmptySiteCounts.append(
        self.paths.getTmpSegmentNonemptySiteCountsPath(sampleIndex,
                                                       genomeSegmentLabel))
    segCmd.extend(
        ["--nonempty-site-count-file", segFiles.nonEmptySiteCounts[-1]])

    bamPath = self.params.bamList[sampleIndex]
    segCmd.extend(["--align-file", bamPath])

    if self.params.isHighDepthFilter:
        segCmd.extend(["--chrom-depth-file", self.paths.getChromDepth()])

    def addListCmdOption(optList, arg):
        if optList is None: return
        for val in optList:
            segCmd.extend([arg, val])

    addListCmdOption(self.params.indelCandidatesList,
                     '--candidate-indel-input-vcf')
    addListCmdOption(self.params.forcedGTList, '--force-output-vcf')

    setTaskLabel = preJoin(taskPrefix, "countErrors_" + gseg.id)
    self.addTask(setTaskLabel,
                 segCmd,
                 dependencies=dependencies,
                 memMb=self.params.callMemMb)

    return setTaskLabel
def mergeSequenceErrorCounts(self, taskPrefix, dependencies, runStatsLogPaths):

    runMergeLabel = preJoin(taskPrefix, "mergeCounts")
    runMergeCmd = [self.params.mergeCountsBin]
    for statsFile in runStatsLogPaths:
        runMergeCmd.extend(["--counts-file", statsFile])
    runMergeCmd.extend(
        ["--output-file",
         self.paths.getErrorCountsOutputPath()])
    return self.addTask(runMergeLabel,
                        runMergeCmd,
                        dependencies=dependencies,
                        isForceLocal=True)
        def catRealignedBam(sampleIndex):
            segmentList = segFiles.sample[sampleIndex].bamRealign
            output = self.paths.getRealignedBamPath(sampleIndex)

            bamCatCmd = bamListCatCmd(self.params.samtoolsBin, segmentList,
                                      output)
            bamCatTaskLabel = preJoin(
                taskPrefix,
                "realignedBamCat_" + self.paths.sampleLabel(sampleIndex))
            finishTasks.add(
                self.addTask(bamCatTaskLabel,
                             bamCatCmd,
                             dependencies=completeSegmentsTask))
Example #19
0
def mergeSupportBams(self,
                     mergeBamTasks,
                     taskPrefix="",
                     isNormal=True,
                     bamIdx=0,
                     dependencies=None):

    if isNormal:
        bamList = self.params.normalBamList
    else:
        bamList = self.params.tumorBamList

    for bamPath in bamList:
        # merge support bams
        supportBamFile = self.paths.getFinalSupportBamPath(bamPath, bamIdx)
        mergeCmd = [
            sys.executable, self.params.mantaMergeBam, self.params.samtoolsBin,
            self.paths.getSortedSupportBamMask(bamIdx), supportBamFile,
            self.paths.getSupportBamListPath(bamIdx)
        ]

        mergeBamTask = self.addTask(preJoin(taskPrefix,
                                            "merge_evidenceBam_%s" % (bamIdx)),
                                    mergeCmd,
                                    dependencies=dependencies)
        mergeBamTasks.add(mergeBamTask)

        # index the filtered bam
        ### TODO still needs to handle the case where supportBamFile does not exist
        indexCmd = [self.params.samtoolsBin, "index", supportBamFile]
        indexBamTask = self.addTask(preJoin(taskPrefix,
                                            "index_evidenceBam_%s" % (bamIdx)),
                                    indexCmd,
                                    dependencies=mergeBamTask)
        mergeBamTasks.add(indexBamTask)

        bamIdx += 1

    return bamIdx
Example #20
0
 def extractSmall(inPath, outPath):
     maxSize = int(self.params.minScoredVariantSize) - 1
     if maxSize < 1: return
     smallCmd = getExtractSmallCmd(maxSize, inPath, outPath)
     smallTask = self.addTask(preJoin(taskPrefix, "extractSmallIndels"),
                              smallCmd,
                              dependencies=candSortTask,
                              isForceLocal=True)
     nextStepWait.add(
         self.addTask(smallTask + "_tabix",
                      getVcfTabixCmd(outPath),
                      dependencies=smallTask,
                      isForceLocal=True))
Example #21
0
    def sortVcfs(pathList, outPath, label, isDiploid=False, isCandidate=False):
        if len(pathList) == 0: return set()

        # make header modifications to first vcf in list of files to be sorted:
        headerFixTask = preJoin(taskPrefix, "fixVcfHeader_" + label)

        def getHeaderFixCmd(fileName):
            tmpName = fileName + ".reheader.tmp"
            cmd = "\"%s\" \"%s\"" % (sys.executable,
                                     self.params.vcfCmdlineSwapper)
            cmd += ' "' + " ".join(self.params.configCommandLine) + '"'
            cmd += " < \"%s\" > \"%s\"" % (fileName, tmpName)
            cmd += " && " + " ".join(
                getMvCmd()) + " \"%s\" \"%s\"" % (tmpName, fileName)
            return cmd

        self.addTask(headerFixTask,
                     getHeaderFixCmd(pathList[0]),
                     dependencies=dependencies,
                     isForceLocal=True)

        vcfListFile = self.paths.getVcfListPath(label)
        inputVcfTask = self.addWorkflowTask(preJoin(taskPrefix,
                                                    label + "InputList"),
                                            listFileWorkflow(
                                                vcfListFile, pathList),
                                            dependencies=headerFixTask)

        sortCmd = getVcfSortCmd(vcfListFile, outPath, isDiploid, isCandidate)
        sortTask = self.addTask(preJoin(taskPrefix, "sort_" + label),
                                sortCmd,
                                dependencies=inputVcfTask)

        nextStepWait.add(
            self.addTask(preJoin(taskPrefix, "tabix_" + label),
                         getVcfTabixCmd(outPath),
                         dependencies=sortTask,
                         isForceLocal=True))
        return sortTask
Example #22
0
def runStats(self,taskPrefix="",dependencies=None) :

    statsPath=self.paths.getStatsPath()

    cmd = [ self.params.mantaStatsBin ]
    cmd.extend(["--output-file",statsPath])
    for bamPath in self.params.normalBamList :
        cmd.extend(["--align-file",bamPath])
    for bamPath in self.params.tumorBamList :
        cmd.extend(["--tumor-align-file",bamPath])

    statsTask = self.addTask(preJoin(taskPrefix,"generateStats"),cmd,dependencies=dependencies)

    nextStepWait = set()
    nextStepWait.add(statsTask)

    # summarize stats for humans, no need for follow-up tasks to wait for this:
    cmd  = self.params.mantaStatsSummaryBin
    cmd += " --align-stats " + statsPath
    cmd += " > " + self.paths.getStatsSummaryPath()
    self.addTask(preJoin(taskPrefix,"summarizeStats"),cmd,dependencies=statsTask)

    return nextStepWait
Example #23
0
def runStats(self,taskPrefix="",dependencies=None) :

    statsPath=self.paths.getStatsPath()

    cmd = [ self.params.mantaStatsBin ]
    cmd.extend(["--output-file",statsPath])
    for bamPath in self.params.normalBamList :
        cmd.extend(["--align-file",bamPath])
    for bamPath in self.params.tumorBamList :
        cmd.extend(["--tumor-align-file",bamPath])

    statsTask = self.addTask(preJoin(taskPrefix,"generateStats"),cmd,dependencies=dependencies)

    nextStepWait = set()
    nextStepWait.add(statsTask)

    # summarize stats for humans, no need for follow-up tasks to wait for this:
    cmd  = self.params.mantaStatsSummaryBin
    cmd += " --align-stats " + statsPath
    cmd += " > " + self.paths.getStatsSummaryPath()
    self.addTask(preJoin(taskPrefix,"summarizeStats"),cmd,dependencies=statsTask)

    return nextStepWait
Example #24
0
    def depthFunc(self,taskPrefix,dependencies,bamFile,outFile) :
        outputPath=outFile
        outputFilename=os.path.basename(outputPath)

        tmpDir=os.path.join(outputPath+".tmpdir")
        dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpDir, dependencies=dependencies, isForceLocal=True)

        tmpFiles = []
        scatterTasks = set()

        for (chromIndex, chromLabel) in enumerate(self.params.chromOrder) :
            cid = getRobustChromId(chromIndex, chromLabel)
            tmpFiles.append(os.path.join(tmpDir,outputFilename+"_"+cid))
            cmd = [self.params.mantaGetChromDepthBin,"--align-file",bamFile,"--chrom",chromLabel,"--output",tmpFiles[-1]]
            scatterTasks.add(self.addTask(preJoin(taskPrefix,"estimateChromDepth_"+cid),cmd,dependencies=dirTask))

        catCmd = "cat " + " ".join(["'%s'" % (x) for x in tmpFiles]) + " > '%s'" % (outputPath)
        catTask = self.addTask(preJoin(taskPrefix,"catChromDepth"),catCmd,dependencies=scatterTasks, isForceLocal=True)

        nextStepWait = set()
        nextStepWait.add(catTask)

        return nextStepWait
def callGenome(self,taskPrefix="",dependencies=None):
    """
    run counter on all genome segments
    """

    tmpSegmentDir=self.paths.getTmpSegmentDir()
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), getMkdirCmd() + [tmpSegmentDir], dependencies=dependencies, isForceLocal=True)

    segmentTasks = set()

    segFiles = TempSegmentFiles()
    for gseg in getNextGenomeSegment(self.params) :

        segmentTasks |= callGenomeSegment(self, gseg, segFiles, dependencies=dirTask)

    if len(segmentTasks) == 0 :
        raise Exception("No genome regions to analyze. Possible target region parse error.")

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,"completedAllGenomeSegments"),dependencies=segmentTasks)

    finishTasks = set()

    # merge segment stats:
    finishTasks.add(mergeSequenceAlleleCounts(self, taskPrefix, completeSegmentsTask, segFiles.counts))

    if self.params.isReportObservedIndels :
        finishTasks.add(self.concatIndexBed(taskPrefix, completeSegmentsTask, segFiles.observedIndelBed,
                                            self.paths.getObservedIndelBedPath(), "observedIndels"))

    if not self.params.isRetainTempFiles :
        rmTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmTmpCmd,dependencies=finishTasks, isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
Example #26
0
def summarizeStats(self, taskPrefix="", dependencies=None):
    statsPath = self.paths.getStatsPath()

    summaryTasks = set()
    # summarize stats in format that's easier for human review
    cmd = [self.params.mantaStatsSummaryBin]
    cmd.extend(["--align-stats ", statsPath])
    cmd.extend(["--output-file", self.paths.getStatsSummaryPath()])
    summarizeTask = self.addTask(preJoin(taskPrefix, "summarizeStats"),
                                 cmd,
                                 dependencies=dependencies,
                                 isForceLocal=True)
    summaryTasks.add(summarizeTask)

    return summaryTasks
Example #27
0
def runStats(self,taskPrefix="",dependencies=None):

    statsPath=self.paths.getStatsPath()

    cmd = [ self.params.mantaStatsBin ]
    cmd.extend(["--output-file",statsPath])
    for bamPath in self.params.normalBamList :
        cmd.extend(["--align-file",bamPath])
    for bamPath in self.params.tumorBamList :
        cmd.extend(["--tumor-align-file",bamPath])

    nextStepWait = set()
    nextStepWait.add(self.addTask(preJoin(taskPrefix,"generateStats"),cmd,dependencies=dependencies))

    return nextStepWait
        def sortRealignBam(sampleIndex) :
            """
            Sort each realigned bam output
            """
            sortList = segFiles.sample[sampleIndex].bamRealign

            unsorted = self.paths.getTmpUnsortRealignBamPath(genomeSegmentLabel, sampleIndex)
            sorted   = self.paths.getTmpSortRealignBamPath(genomeSegmentLabel, sampleIndex)
            sortList.append(sorted)

            sortCmd="\"%s\" sort \"%s\" -o \"%s\" && rm -f \"%s\"" %\
                    (self.params.samtoolsBin, unsorted, sorted, unsorted)

            sortTaskLabel=preJoin(taskPrefix,"sortRealignedSegment_"+ genomeSegmentLabel + "_" + self.paths.sampleLabel(sampleIndex))
            self.addTask(sortTaskLabel, sortCmd, dependencies=segTaskLabel, memMb=self.params.callMemMb)
            nextStepWait.add(sortTaskLabel)
 def mergeRunStats(self, taskPrefix, dependencies, runStatsLogPaths):
     """
     merge run stats:
     """
     runStatsMergeLabel = preJoin(taskPrefix, "mergeRunStats")
     runStatsMergeCmd = [self.params.statsMergeBin]
     for statsFile in runStatsLogPaths:
         runStatsMergeCmd.extend(["--stats-file", statsFile])
     runStatsMergeCmd.extend(
         ["--output-file", self.paths.getRunStatsPath()])
     runStatsMergeCmd.extend(
         ["--report-file",
          self.paths.getRunStatsReportPath()])
     return self.addTask(runStatsMergeLabel,
                         runStatsMergeCmd,
                         dependencies=dependencies,
                         isForceLocal=True)
Example #30
0
        def sortRealignBam(label, sortList):
            unsorted = self.paths.getTmpUnsortRealignBamPath(
                genomeSegmentLabel, label)
            sorted = self.paths.getTmpSortRealignBamPath(
                genomeSegmentLabel, label)
            sortList.append(sorted)

            sortCmd="\"%s\" sort \"%s\" -o \"%s\" && rm -f \"%s\"" %\
                    (self.params.samtoolsBin, unsorted, sorted, unsorted)

            sortTaskLabel = preJoin(
                taskPrefix,
                "sortRealignedSegment_" + genomeSegmentLabel + "_" + label)
            self.addTask(sortTaskLabel,
                         sortCmd,
                         dependencies=callTask,
                         memMb=self.params.callMemMb)
            nextStepWait.add(sortTaskLabel)
def getSequenceErrorEstimatesForSample(self,
                                       estimationIntervals,
                                       sampleIndex,
                                       taskPrefix="",
                                       dependencies=None):
    """
    Count sequencing errors in one sample and use these to estimate sample error parameters
    """

    segmentTasks = set()

    segFiles = TempSequenceAlleleCountsSegmentFiles()

    if self.params.isErrorEstimationFromAllData:
        # get error counts from full data set:
        segmentTasks |= countAllEligibleSequenceEvidence(
            self, estimationIntervals, sampleIndex, segFiles, taskPrefix,
            dependencies)
    else:
        # Launch tasks until the required counts are found
        segmentTasks |= countSequenceEvidenceUntilTargetIsReached(
            self, estimationIntervals, sampleIndex, segFiles, taskPrefix,
            dependencies)

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=segmentTasks)

    # merge segment stats:
    mergeCountsTask = mergeSequenceAlleleCounts(
        self,
        sampleIndex,
        segFiles.counts,
        taskPrefix=taskPrefix,
        dependencies=completeSegmentsTask)

    # get error parameters:
    estimateTask = estimateParametersFromAlleleCounts(
        self, sampleIndex, taskPrefix=taskPrefix, dependencies=mergeCountsTask)

    nextStepWait = set()
    nextStepWait.add(estimateTask)
    return nextStepWait
Example #32
0
def callGenomeSegment(self, gseg, segFiles, taskPrefix="", dependencies=None) :

    isFirstSegment = (len(segFiles.gvcf) == 0)

    segStr = str(gseg.id)

    segCmd = [ self.params.snoiseBin ]

    segCmd.extend(["--region", gseg.chromLabel + ":" + str(gseg.beginPos) + "-" + str(gseg.endPos)])
    segCmd.extend(["-min-mapping-quality",self.params.minMapq])
    segCmd.extend(["--ref", self.params.referenceFasta ])
    segCmd.extend(["-max-window-mismatch", "2", "20" ])
    segCmd.extend(["-genome-size", str(self.params.totalKnownReferenceSize)] )
    segCmd.extend(["-max-indel-size", "50"] )

    segCmd.extend(['-min-qscore','17'])
    segCmd.extend(['-bsnp-ssd-no-mismatch', '0.35'])
    segCmd.extend(['-bsnp-ssd-one-mismatch', '0.6'])
    segCmd.extend(['-min-vexp', '0.25'])

    for bamPath in self.params.bamList :
        segCmd.extend(["--align-file",bamPath])

    if not isFirstSegment :
        segCmd.append("--skip-vcf-header")

    if self.params.indelCandidates is not None :
        segCmd.extend(['--candidate-indel-input-vcf', self.params.indelCandidates])

     # vcf is written to stdout so we need shell features:
    segCmd = " ".join(segCmd)

    segFiles.gvcf.append(self.paths.getTmpSegmentGvcfPath(segStr))
    segCmd += " | %s -c >| %s" % (self.params.bgzip9Bin, segFiles.gvcf[-1])

    nextStepWait = set()

    setTaskLabel=preJoin(taskPrefix,"callGenomeSegment_"+gseg.id)
    self.addTask(setTaskLabel,segCmd,dependencies=dependencies,memMb=self.params.callMemMb)
    nextStepWait.add(setTaskLabel)

    return nextStepWait
def mergeSequenceAlleleCounts(self,
                              sampleIndex,
                              segmentAlleleCountsFiles,
                              taskPrefix="",
                              dependencies=None):
    """
    Given sequencing error counts generated from multiple genome regions, merge these into a single error count set
    """

    runMergeLabel = preJoin(taskPrefix, "mergeCounts")
    runMergeCmd = [self.params.mergeCountsBin]
    runMergeCmd.extend(
        ["--output-file",
         self.paths.getAlleleCountsOutputPath(sampleIndex)])
    for segmentAlleleCountsFile in segmentAlleleCountsFiles:
        runMergeCmd.extend(["--counts-file", segmentAlleleCountsFile])
    return self.addTask(runMergeLabel,
                        runMergeCmd,
                        dependencies=dependencies,
                        isForceLocal=True)
    def compressRawVcf(rawVcfFilename, label) :
        """
        Process each raw vcf file with header modifications and bgzip compression
        """

        compressedVariantsPath = rawVcfFilename +".gz"
        compressCmd = "cat "+quote(rawVcfFilename)

        if isFirstSegment :
            def getHeaderFixCmd() :
                cmd  = "\"%s\" -E \"%s\"" % (sys.executable, self.params.vcfCmdlineSwapper)
                cmd += ' "' + " ".join(self.params.configCommandLine) + '"'
                return cmd
            compressCmd += " | " + getHeaderFixCmd()

        compressCmd += " | \"%s\" -c >| \"%s\"" % (self.params.bgzip9Bin, compressedVariantsPath)

        compressTaskLabel=preJoin(taskPrefix,"compressGenomeSegment_"+genomeSegmentLabel+"_"+label)
        self.addTask(compressTaskLabel, compressCmd, dependencies=segTaskLabel, memMb=self.params.callMemMb)
        nextStepWait.add(compressTaskLabel)
        return compressedVariantsPath
Example #35
0
def runDepth(self,taskPrefix="",dependencies=None) :
    """
    estimate chrom depth
    """

    bamFile=""
    if len(self.params.normalBamList) :
        bamFile = self.params.normalBamList[0]
    elif len(self.params.tumorBamList) :
        bamFile = self.params.tumorBamList[0]
    else :
        return set()


    cmd  = "%s -E %s" % (sys.executable, self.params.getChromDepth)
    cmd += " --bam '%s'" % (bamFile)
    cmd += " > %s" % (self.paths.getChromDepth())

    nextStepWait = set()
    nextStepWait.add(self.addTask(preJoin(taskPrefix,"estimateChromDepth"),cmd,dependencies=dependencies))

    return nextStepWait
Example #36
0
def sortEvidenceBams(self,
                     sortBamTasks,
                     taskPrefix="",
                     binStr="",
                     dependencies=None):

    for bamIdx, _ in enumerate(self.params.normalBamList +
                               self.params.tumorBamList):
        supportBam = self.paths.getSupportBamPath(bamIdx, binStr)
        sortedBam = self.paths.getSortedSupportBamPath(bamIdx, binStr)

        # first check the existence of the supporting bam
        # then sort the bam only if it exists
        sortBamCmd = [
            sys.executable, self.params.mantaSortBam, self.params.samtoolsBin,
            supportBam, sortedBam
        ]

        sortBamTask = preJoin(taskPrefix,
                              "sortEvidenceBam_%s_%s" % (binStr, bamIdx))
        sortBamTasks.add(
            self.addTask(sortBamTask, sortBamCmd, dependencies=dependencies))
Example #37
0
def sortBams(self, sortBamTasks, taskPrefix="", binStr="", isNormal=True, bamIdx=0, dependencies=None):

    if isNormal:
        bamList = self.params.normalBamList
    else:
        bamList = self.params.tumorBamList

    for _ in bamList:
        supportBam = self.paths.getSupportBamPath(bamIdx, binStr)
        sortedBam = self.paths.getSortedSupportBamPath(bamIdx, binStr)


        # first check the existence of the supporting bam
        # then sort the bam only if it exists
        sortBamCmd = [ sys.executable, self.params.mantaSortBam,
                      self.params.samtoolsBin, supportBam, sortedBam ]

        sortBamTask = preJoin(taskPrefix, "sortEvidenceBam_%s_%s" % (binStr, bamIdx))
        sortBamTasks.add(self.addTask(sortBamTask, sortBamCmd, dependencies=dependencies))
        bamIdx += 1

    return bamIdx
Example #38
0
    def launchNextTask() :
        """
        Launch the next task in queue for this sample

        Return false if there are no more jobs to launch
        """
        taskIndex = len(allTasks)
        if taskIndex >= len(estimationIntervals) : return False

        gseg = estimationIntervals[taskIndex]
        countTask = countGenomeSegment(self, sampleIndex, gseg, segFiles,
                                       taskPrefix=taskPrefix, dependencies=dependencies)

        #self.flowLog("ZZZ Sample%i launching taskIndex/task %i %s" % (sampleIndex, taskIndex, countTask))

        allTasks.add(countTask)
        taskByIndex.append(countTask)

        updateTaskLabel=preJoin(taskPrefix,"trackCounts_"+gseg.id)
        updateWorkflow = UpdateCompletedTaskTrackerWorkflow(taskIndex, segFiles.nonEmptySiteCounts[-1],
                                                            completedTaskTracker)
        self.addWorkflowTask(updateTaskLabel, updateWorkflow, dependencies=countTask, isEphemeral=True)

        return True
Example #39
0
def callGenomeSegment(self, gsegGroup, segFiles, taskPrefix="", dependencies=None) :

    assert(len(gsegGroup) != 0)
    gid=gsegGroup[0].id
    if len(gsegGroup) > 1 :
        gid += "_to_"+gsegGroup[-1].id

    isFirstSegment = (len(segFiles.snv) == 0)

    segCmd = [ self.params.strelkaSomaticBin ]
    for gseg in gsegGroup :
        segCmd.extend(["--region", gseg.bamRegion])

    segCmd.append("-filter-unanchored")
    segCmd.extend(["-min-mapping-quality",str(self.params.minTier1Mapq)])
    segCmd.extend(["-min-qscore","0"])
    segCmd.extend(["--ref", self.params.referenceFasta ])
    segCmd.extend(["-max-window-mismatch", "3", "20" ])
    segCmd.extend(["-genome-size", str(self.params.knownSize)] )
    segCmd.extend(["-max-indel-size", "50"] )
    segCmd.extend(["-indel-nonsite-match-prob", "0.5"] )
    segCmd.extend(["--somatic-snv-rate", str(self.params.ssnvPrior) ] )
    segCmd.extend(["--shared-site-error-rate", str(self.params.ssnvNoise) ] )
    segCmd.extend(["--shared-site-error-strand-bias-fraction", str(self.params.ssnvNoiseStrandBiasFrac) ] )
    segCmd.extend(["--somatic-indel-rate", str(self.params.sindelPrior) ] )
    segCmd.extend(["--shared-indel-error-factor", str(self.params.sindelNoiseFactor)])
    segCmd.extend(["--tier2-min-mapping-quality", str(self.params.minTier2Mapq) ] )
    segCmd.extend(["--tier2-mismatch-density-filter-count", "10"] )
    segCmd.append("--tier2-no-filter-unanchored")
    segCmd.extend(["--tier2-indel-nonsite-match-prob", "0.25"] )
    segCmd.append("--tier2-include-singleton")
    segCmd.append("--tier2-include-anomalous")

    segCmd.extend(["--strelka-snv-max-filtered-basecall-frac", str(self.params.snvMaxFilteredBasecallFrac)])
    segCmd.extend(["--strelka-snv-max-spanning-deletion-frac", str(self.params.snvMaxSpanningDeletionFrac)])
    segCmd.extend(["--strelka-snv-min-qss-ref", str(self.params.ssnvQuality_LowerBound)])

    segCmd.extend(["--strelka-indel-max-window-filtered-basecall-frac", str(self.params.indelMaxWindowFilteredBasecallFrac)])
    segCmd.extend(["--strelka-indel-min-qsi-ref", str(self.params.sindelQuality_LowerBound)])

    if self.params.indelErrorModelName is not None :
        segCmd.extend(['--indel-error-model-name',self.params.indelErrorModelName])
    if self.params.inputIndelErrorModelsFile is not None :
        segCmd.extend(['--indel-error-models-file', self.params.inputIndelErrorModelsFile])

    segCmd.extend(["--ssnv-contam-tolerance", str(self.params.ssnvContamTolerance) ] )
    segCmd.extend(["--indel-contam-tolerance", str(self.params.indelContamTolerance) ] )

    if self.params.isEVS :
        if self.params.somaticSnvScoringModelFile is not None :
            segCmd.extend(['--somatic-snv-scoring-model-file', self.params.somaticSnvScoringModelFile])
        if self.params.somaticIndelScoringModelFile is not None :
            segCmd.extend(['--somatic-indel-scoring-model-file', self.params.somaticIndelScoringModelFile])

    if self.params.isReportEVSFeatures :
        segCmd.append("--report-evs-features")

    for bamPath in self.params.normalBamList :
        segCmd.extend(["--normal-align-file", bamPath])
    for bamPath in self.params.tumorBamList :
        segCmd.extend(["--tumor-align-file", bamPath])

    tmpSnvPath = self.paths.getTmpSegmentSnvPath(gid)
    segFiles.snv.append(tmpSnvPath+".gz")
    segCmd.extend(["--somatic-snv-file ", tmpSnvPath ] )

    tmpIndelPath = self.paths.getTmpSegmentIndelPath(gid)
    segFiles.indel.append(tmpIndelPath+".gz")
    segCmd.extend(["--somatic-indel-file", tmpIndelPath ] )

    if self.params.isOutputCallableRegions :
        tmpCallablePath = self.paths.getTmpSegmentRegionPath(gid)
        segFiles.callable.append(tmpCallablePath+".gz")
        segCmd.extend(["--somatic-callable-regions-file", tmpCallablePath ])

    if self.params.isWriteRealignedBam :
        segCmd.extend(["-realigned-read-file", self.paths.getTmpUnsortRealignBamPath(gid, "normal")])
        segCmd.extend(["--tumor-realigned-read-file",self.paths.getTmpUnsortRealignBamPath(gid, "tumor")])

    def addListCmdOption(optList,arg) :
        if optList is None : return
        for val in optList :
            segCmd.extend([arg, val])

    addListCmdOption(self.params.indelCandidatesList, '--candidate-indel-input-vcf')
    addListCmdOption(self.params.forcedGTList, '--force-output-vcf')
    addListCmdOption(self.params.noiseVcfList, '--noise-vcf')

    segFiles.stats.append(self.paths.getTmpRunStatsPath(gid))
    segCmd.extend(["--stats-file", segFiles.stats[-1]])

    if not isFirstSegment :
        segCmd.append("--strelka-skip-header")

    if self.params.isHighDepthFilter :
        segCmd.extend(["--strelka-chrom-depth-file", self.paths.getChromDepth()])
        segCmd.extend(["--strelka-max-depth-factor", self.params.depthFilterMultiple])

    if self.params.extraVariantCallerArguments is not None :
        for arg in self.params.extraVariantCallerArguments.strip().split() :
            segCmd.append(arg)


    nextStepWait = set()

    callTask=preJoin(taskPrefix,"callGenomeSegment_"+gid)
    self.addTask(callTask,segCmd,dependencies=dependencies,memMb=self.params.callMemMb)

    # fix vcf header to use parent pyflow cmdline instead of random segment command:
    compressWaitFor=callTask
    if isFirstSegment :
        headerFixTask=preJoin(taskPrefix,"fixVcfHeader_"+gid)
        def getHeaderFixCmd(fileName) :
            tmpName=fileName+".reheader.tmp"
            cmd  = "\"%s\" -E \"%s\"" % (sys.executable, self.params.vcfCmdlineSwapper)
            cmd += ' "' + " ".join(self.params.configCommandLine) + '"'
            cmd += " < \"%s\" > \"%s\" && mv \"%s\" \"%s\"" % (fileName,tmpName,
                                                               tmpName, fileName)
            return cmd

        headerFixCmd  = getHeaderFixCmd(tmpSnvPath)
        headerFixCmd += " && "
        headerFixCmd += getHeaderFixCmd(tmpIndelPath)

        self.addTask(headerFixTask, headerFixCmd, dependencies=callTask, isForceLocal=True)
        compressWaitFor=headerFixTask

    compressTask=preJoin(taskPrefix,"compressSegmentOutput_"+gid)
    compressCmd="\"%s\" \"%s\" && \"%s\" \"%s\"" % (self.params.bgzipBin, tmpSnvPath, self.params.bgzipBin, tmpIndelPath)
    if self.params.isOutputCallableRegions :
        compressCmd += " && \"%s\" \"%s\"" % (self.params.bgzipBin, self.paths.getTmpSegmentRegionPath(gid))

    self.addTask(compressTask, compressCmd, dependencies=compressWaitFor, isForceLocal=True)
    nextStepWait.add(compressTask)

    if self.params.isWriteRealignedBam :
        def sortRealignBam(label, sortList) :
            unsorted = self.paths.getTmpUnsortRealignBamPath(gid, label)
            sorted   = self.paths.getTmpRealignBamPath(gid, label)
            sortList.append(sorted)

            # adjust sorted to remove the ".bam" suffix
            sorted = sorted[:-4]
            sortCmd="\"%s\" sort \"%s\" \"%s\" && rm -f \"%s\"" % (self.params.samtoolsBin,unsorted,sorted,unsorted)

            sortTaskLabel=preJoin(taskPrefix,"sortRealignedSegment_"+label+"_"+gid)
            self.addTask(sortTaskLabel,sortCmd,dependencies=callTask,memMb=self.params.callMemMb)
            nextStepWait.add(sortTaskLabel)

        sortRealignBam("normal", segFiles.normalRealign)
        sortRealignBam("tumor", segFiles.tumorRealign)

    return nextStepWait
Example #40
0
    def depthFunc(self, taskPrefix, dependencies, bamFile, outFile):
        outputPath = outFile
        outputFilename = os.path.basename(outputPath)

        tmpDir = os.path.join(outputPath + ".tmpdir")
        makeTmpDirCmd = getMkdirCmd() + [tmpDir]
        dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                               makeTmpDirCmd,
                               dependencies=dependencies,
                               isForceLocal=True)

        tmpFiles = []
        scatterTasks = set()

        def getChromosomeGroups(params):
            """
            Iterate through chromosomes/contigs and group small contigs together. This functions as a generator yielding
            successive contig groups.
            """
            minSize = 200000
            group = []
            headSize = 0

            chromCount = len(params.chromSizes)
            assert (len(params.chromOrder) == chromCount)
            for chromIndex in range(chromCount):
                chromLabel = params.chromOrder[chromIndex]
                if chromLabel in params.chromIsSkipped: continue

                chromSize = params.chromSizes[chromLabel]
                if headSize + chromSize <= minSize:
                    group.append((chromIndex, chromLabel))
                    headSize += chromSize
                else:
                    if len(group) != 0: yield (group)
                    group = [(chromIndex, chromLabel)]
                    headSize = chromSize
            if len(group) != 0: yield (group)

        for chromGroup in getChromosomeGroups(self.params):
            assert (len(chromGroup) > 0)
            cid = getRobustChromId(chromGroup[0][0], chromGroup[0][1])
            if len(chromGroup) > 1:
                cid += "_to_" + getRobustChromId(chromGroup[-1][0],
                                                 chromGroup[-1][1])
            tmpFiles.append(os.path.join(tmpDir, outputFilename + "_" + cid))
            cmd = [
                self.params.getChromDepthBin, "--align-file", bamFile,
                "--output", tmpFiles[-1]
            ]
            for (chromIndex, chromLabel) in chromGroup:
                cmd.extend(["--chrom", chromLabel])
            scatterTasks.add(
                self.addTask(preJoin(taskPrefix, "estimateChromDepth_" + cid),
                             cmd,
                             dependencies=dirTask))

        catCmd = [self.params.catScript, "--output", outputPath] + tmpFiles
        catTask = self.addTask(preJoin(taskPrefix, "catChromDepth"),
                               catCmd,
                               dependencies=scatterTasks,
                               isForceLocal=True)

        nextStepWait = set()
        nextStepWait.add(catTask)

        return nextStepWait
Example #41
0
def callGenomeSegment(self,
                      gsegGroup,
                      segFiles,
                      taskPrefix="",
                      dependencies=None):

    assert (len(gsegGroup) != 0)
    gid = gsegGroup[0].id
    if len(gsegGroup) > 1:
        gid += "_to_" + gsegGroup[-1].id

    isFirstSegment = (len(segFiles.variants) == 0)

    segCmd = [self.params.strelkaGermlineBin]

    self.appendCommonGenomeSegmentCommandOptions(gsegGroup, segCmd)

    segCmd.extend(["-min-mapping-quality", self.params.minMapq])
    segCmd.extend(["-max-window-mismatch", "2", "20"])

    segCmd.extend(
        ["--gvcf-output-prefix",
         self.paths.getTmpSegmentGvcfPrefix(gid)])
    segCmd.extend(['--gvcf-min-gqx', '15'])
    segCmd.extend(['--gvcf-min-homref-gqx', '15'])
    segCmd.extend(['--gvcf-max-snv-strand-bias', '10'])
    segCmd.extend(['-min-qscore', '17'])
    segCmd.extend(['-bsnp-ssd-no-mismatch', '0.35'])
    segCmd.extend(['-bsnp-ssd-one-mismatch', '0.6'])
    segCmd.extend(['-min-vexp', '0.25'])
    segCmd.extend(['--enable-read-backed-phasing'])

    segFiles.stats.append(self.paths.getTmpRunStatsPath(gid))
    segCmd.extend(["--stats-file", segFiles.stats[-1]])

    if self.params.isRNA:
        segCmd.extend(['-bsnp-diploid-het-bias', '0.45'])
        segCmd.extend(['--use-rna-scoring'])
        segCmd.extend(['--retain-optimal-soft-clipping'])

    # Empirical Variant Scoring(EVS):
    if self.params.isEVS:
        if self.params.snvScoringModelFile is not None:
            segCmd.extend(
                ['--snv-scoring-model-file', self.params.snvScoringModelFile])
        if self.params.indelScoringModelFile is not None:
            segCmd.extend([
                '--indel-scoring-model-file', self.params.indelScoringModelFile
            ])

    for bamPath in self.params.bamList:
        segCmd.extend(["--align-file", bamPath])

    if not isFirstSegment:
        segCmd.append("--gvcf-skip-header")
    elif len(self.params.callContinuousVf) > 0:
        segCmd.extend(["--gvcf-include-header", "VF"])

    if self.params.isHighDepthFilter:
        segCmd.extend(["--chrom-depth-file", self.paths.getChromDepth()])

    # TODO STREL-125 come up with new solution for outbams
    if self.params.isWriteRealignedBam:
        segCmd.extend([
            "-realigned-read-file",
            self.paths.getTmpUnsortRealignBamPath(gid)
        ])

    if self.params.noCompressBed is not None:
        segCmd.extend(['--nocompress-bed', self.params.noCompressBed])

    if self.params.ploidyFilename is not None:
        segCmd.extend(['--ploidy-region-vcf', self.params.ploidyFilename])

    for gseg in gsegGroup:
        # we have special logic to prevent the continuousVF targets from being grouped, the assertion here
        # verifies that this is working as expected:
        if self.params.callContinuousVf is not None and gseg.chromLabel in self.params.callContinuousVf:
            assert (len(gsegGroup) == 1)
            segCmd.append('--call-continuous-vf')

    if self.params.isEstimateSequenceError:
        for bamIndex in range(len(self.params.bamList)):
            segCmd.extend([
                '--indel-error-models-file',
                self.paths.getIndelErrorModelPath(bamIndex)
            ])
    else:
        segCmd.extend(
            ['--indel-error-models-file', self.params.indelErrorRateDefault])

    segCmd.extend(['--theta-file', self.params.thetaParamFile])

    segTaskLabel = preJoin(taskPrefix, "callGenomeSegment_" + gid)
    self.addTask(segTaskLabel,
                 segCmd,
                 dependencies=dependencies,
                 memMb=self.params.callMemMb)

    # clean up and compress genome segment files:
    nextStepWait = set()

    def compressRawVcf(rawVcfFilename, label):
        """
        process each raw vcf file with header modifications and bgzip compression
        """

        compressedVariantsPath = rawVcfFilename + ".gz"
        compressCmd = "cat " + quote(rawVcfFilename)

        if isFirstSegment:

            def getHeaderFixCmd():
                cmd = "\"%s\" -E \"%s\"" % (sys.executable,
                                            self.params.vcfCmdlineSwapper)
                cmd += ' "' + " ".join(self.params.configCommandLine) + '"'
                return cmd

            compressCmd += " | " + getHeaderFixCmd()

        compressCmd += " | \"%s\" -c >| \"%s\"" % (self.params.bgzip9Bin,
                                                   compressedVariantsPath)

        compressTaskLabel = preJoin(
            taskPrefix, "compressGenomeSegment_" + gid + "_" + label)
        self.addTask(compressTaskLabel,
                     compressCmd,
                     dependencies=segTaskLabel,
                     memMb=self.params.callMemMb)
        nextStepWait.add(compressTaskLabel)
        return compressedVariantsPath

    rawVariantsPath = self.paths.getTmpSegmentVariantsPath(gid)
    compressedVariantsPath = compressRawVcf(rawVariantsPath, "variants")
    segFiles.variants.append(compressedVariantsPath)

    sampleCount = len(self.params.bamList)
    for sampleIndex in range(sampleCount):
        rawVariantsPath = self.paths.getTmpSegmentGvcfPath(gid, sampleIndex)
        compressedVariantsPath = compressRawVcf(rawVariantsPath,
                                                gvcfSampleLabel(sampleIndex))
        segFiles.sample[sampleIndex].gvcf.append(compressedVariantsPath)

    if self.params.isWriteRealignedBam:

        def sortRealignBam(sortList):
            unsorted = self.paths.getTmpUnsortRealignBamPath(gid)
            sorted = self.paths.getTmpRealignBamPath(gid)
            sortList.append(sorted)

            # adjust sorted to remove the ".bam" suffix
            sorted = sorted[:-4]
            sortCmd = "\"%s\" sort \"%s\" \"%s\" && rm -f \"%s\"" % (
                self.params.samtoolsBin, unsorted, sorted, unsorted)

            sortTaskLabel = preJoin(taskPrefix, "sortRealignedSegment_" + gid)
            self.addTask(sortTaskLabel,
                         sortCmd,
                         dependencies=segTaskLabel,
                         memMb=self.params.callMemMb)
            nextStepWait.add(sortTaskLabel)

        sortRealignBam(segFiles.bamRealign)

    return nextStepWait
Example #42
0
 def finishBam(tmpList, output, label):
     cmd = bamListCatCmd(self.params.samtoolsBin, tmpList, output)
     finishTasks.add(
         self.addTask(preJoin(taskPrefix, label + "_finalizeBAM"),
                      cmd,
                      dependencies=completeSegmentsTask))
Example #43
0
def callGenome(self, taskPrefix="", dependencies=None):
    """
    run variant caller on all genome segments
    """

    tmpSegmentDir = self.paths.getTmpSegmentDir()
    dirTask = self.addTask(preJoin(taskPrefix, "makeTmpDir"),
                           getMkdirCmd() + [tmpSegmentDir],
                           dependencies=dependencies,
                           isForceLocal=True)

    segmentTasks = set()
    sampleCount = len(self.params.bamList)

    segFiles = TempVariantCallingSegmentFiles(sampleCount)

    for gsegGroup in self.getStrelkaGenomeSegmentGroupIterator(
            contigsExcludedFromGrouping=self.params.callContinuousVf):
        segmentTasks |= callGenomeSegment(self,
                                          gsegGroup,
                                          segFiles,
                                          dependencies=dirTask)

    if len(segmentTasks) == 0:
        raise Exception(
            "No genome regions to analyze. Possible target region parse error."
        )

    # create a checkpoint for all segments:
    completeSegmentsTask = self.addTask(preJoin(taskPrefix,
                                                "completedAllGenomeSegments"),
                                        dependencies=segmentTasks)

    finishTasks = set()

    # merge various VCF outputs
    finishTasks.add(
        self.concatIndexVcf(taskPrefix, completeSegmentsTask,
                            segFiles.variants,
                            self.paths.getVariantsOutputPath(), "variants"))
    for sampleIndex in range(sampleCount):
        concatTask = self.concatIndexVcf(
            taskPrefix, completeSegmentsTask,
            segFiles.sample[sampleIndex].gvcf,
            self.paths.getGvcfOutputPath(sampleIndex),
            gvcfSampleLabel(sampleIndex))
        finishTasks.add(concatTask)
        if sampleIndex == 0:
            outputPath = self.paths.getGvcfOutputPath(sampleIndex)
            outputDirname = os.path.dirname(outputPath)
            outputBasename = os.path.basename(outputPath)

            def linkLegacy(extension):
                return "ln -s " + quote(
                    outputBasename + extension) + " " + quote(
                        self.paths.getGvcfLegacyFilename() + extension)

            linkCmd = linkLegacy("") + " && " + linkLegacy(".tbi")
            self.addTask(preJoin(taskPrefix, "addLegacyOutputLink"),
                         linkCmd,
                         dependencies=concatTask,
                         isForceLocal=True,
                         cwd=outputDirname)

    # merge segment stats:
    finishTasks.add(
        self.mergeRunStats(taskPrefix, completeSegmentsTask, segFiles.stats))

    if self.params.isWriteRealignedBam:

        def finishBam(tmpList, output, label):
            cmd = bamListCatCmd(self.params.samtoolsBin, tmpList, output)
            finishTasks.add(
                self.addTask(preJoin(taskPrefix, label + "_finalizeBAM"),
                             cmd,
                             dependencies=completeSegmentsTask))

        finishBam(segFiles.bamRealign, self.paths.getRealignedBamPath(),
                  "realigned")

    if not self.params.isRetainTempFiles:
        rmTmpCmd = getRmdirCmd() + [tmpSegmentDir]
        self.addTask(preJoin(taskPrefix, "removeTmpDir"),
                     rmTmpCmd,
                     dependencies=finishTasks,
                     isForceLocal=True)

    nextStepWait = finishTasks

    return nextStepWait
def callGenomeSegment(self, gseg, segFiles, taskPrefix="", dependencies=None):

    segStr = str(gseg.id)

    segCmd = [self.params.getCountsBin]

    segCmd.extend([
        "--region",
        gseg.chromLabel + ":" + str(gseg.beginPos) + "-" + str(gseg.endPos)
    ])
    segCmd.extend(["--ref", self.params.referenceFasta])
    segCmd.extend(["-genome-size", str(self.params.knownSize)])
    segCmd.extend(["-max-indel-size", "50"])

    segFiles.counts.append(self.paths.getTmpSegmentCountsPath(segStr))
    segCmd.extend(["--counts-file", segFiles.counts[-1]])

    for bamPath in self.params.bamList:
        segCmd.extend(["--align-file", bamPath])

    if self.params.isHighDepthFilter:
        segCmd.extend(["--chrom-depth-file", self.paths.getChromDepth()])

    def addListCmdOption(optList, arg):
        if optList is None: return
        for val in optList:
            segCmd.extend([arg, val])

    addListCmdOption(self.params.indelCandidatesList,
                     '--candidate-indel-input-vcf')
    addListCmdOption(self.params.forcedGTList, '--force-output-vcf')

    addListCmdOption(self.params.excludedRegions,
                     "--excluded-regions-bed-file")
    if self.params.knownVariants is not None:
        segCmd.extend(["--known-variants-vcf-file", self.params.knownVariants])

    if self.params.isReportObservedIndels:
        tmpObservedIndelBedPath = self.paths.getTmpObservedIndelBedPath(segStr)
        segFiles.observedIndelBed.append(tmpObservedIndelBedPath + ".gz")
        segCmd.extend(['--observation-bed-file', tmpObservedIndelBedPath])

    if self.params.extraCountsArguments is not None:
        for arg in self.params.extraCountsArguments.strip().split():
            segCmd.append(arg)

    nextStepWait = set()

    setTaskLabel = preJoin(taskPrefix, "countGenomeSegment_" + gseg.id)
    self.addTask(setTaskLabel,
                 segCmd,
                 dependencies=dependencies,
                 memMb=self.params.callMemMb)
    nextStepWait.add(setTaskLabel)

    if self.params.isReportObservedIndels:
        compressTask = preJoin(taskPrefix, "compressSegmentOutput_" + gseg.id)
        compressCmd = "\"%s\" \"%s\"" % (self.params.bgzipBin,
                                         tmpObservedIndelBedPath)
        self.addTask(compressTask,
                     compressCmd,
                     dependencies=setTaskLabel,
                     isForceLocal=True)
        nextStepWait.add(compressTask)

    return nextStepWait
Example #45
0
def runHyGen(self, taskPrefix="", dependencies=None) :
    """
    Run hypothesis generation on each SV locus
    """

    import copy

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    hygenDir=self.paths.getHyGenDir()

    dirTask=self.addTask(preJoin(taskPrefix,"makeHyGenDir"), "mkdir -p "+ hygenDir, dependencies=dependencies, isForceLocal=True)

    isSomatic = (len(self.params.normalBamList) and len(self.params.tumorBamList))
    isTumorOnly = ((not isSomatic) and len(self.params.tumorBamList))

    hyGenMemMb = self.params.hyGenLocalMemMb
    if self.getRunMode() == "sge" :
        hyGenMemMb = self.params.hyGenSGEMemMb

    hygenTasks=set()
    candidateVcfPaths = []
    diploidVcfPaths = []
    somaticVcfPaths = []
    tumorVcfPaths = []

    edgeRuntimeLogPaths = []
    edgeStatsLogPaths = []

    for binId in range(self.params.nonlocalWorkBins) :
        binStr = str(binId).zfill(4)
        candidateVcfPaths.append(self.paths.getHyGenCandidatePath(binStr))
        if isTumorOnly :
            tumorVcfPaths.append(self.paths.getHyGenTumorPath(binStr))
	else:
	    diploidVcfPaths.append(self.paths.getHyGenDiploidPath(binStr))
	    if isSomatic :
                somaticVcfPaths.append(self.paths.getHyGenSomaticPath(binStr))

        hygenCmd = [ self.params.mantaHyGenBin ]
        hygenCmd.extend(["--align-stats",statsPath])
        hygenCmd.extend(["--graph-file",graphPath])
        hygenCmd.extend(["--bin-index", str(binId)])
        hygenCmd.extend(["--bin-count", str(self.params.nonlocalWorkBins)])
        hygenCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        hygenCmd.extend(["--min-candidate-spanning-count", self.params.minCandidateSpanningCount])
        hygenCmd.extend(["--min-scored-sv-size", self.params.minScoredVariantSize])
        hygenCmd.extend(["--ref",self.params.referenceFasta])
        hygenCmd.extend(["--candidate-output-file", candidateVcfPaths[-1]])

	# tumor-only mode
        if isTumorOnly :
            hygenCmd.extend(["--tumor-output-file", tumorVcfPaths[-1]])
	else:
            hygenCmd.extend(["--diploid-output-file", diploidVcfPaths[-1]])
            hygenCmd.extend(["--min-qual-score", self.params.minDiploidVariantScore])
            hygenCmd.extend(["--min-pass-qual-score", self.params.minPassDiploidVariantScore])
            hygenCmd.extend(["--min-pass-gt-score", self.params.minPassDiploidGTScore])
	    # tumor/normal mode
	    if isSomatic :
       	        hygenCmd.extend(["--somatic-output-file", somaticVcfPaths[-1]])
                hygenCmd.extend(["--min-somatic-score", self.params.minSomaticScore])
                hygenCmd.extend(["--min-pass-somatic-score", self.params.minPassSomaticScore])

                # temporary fix for FFPE:
                hygenCmd.append("--skip-remote-reads")

        if self.params.isHighDepthFilter :
            hygenCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        edgeRuntimeLogPaths.append(self.paths.getHyGenEdgeRuntimeLogPath(binStr))
        hygenCmd.extend(["--edge-runtime-log", edgeRuntimeLogPaths[-1]])

        edgeStatsLogPaths.append(self.paths.getHyGenEdgeStatsPath(binStr))
        hygenCmd.extend(["--edge-stats-log", edgeStatsLogPaths[-1]])

        for bamPath in self.params.normalBamList :
            hygenCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList :
            hygenCmd.extend(["--tumor-align-file", bamPath])

        if self.params.isIgnoreAnomProperPair :
            hygenCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            hygenCmd.append("--rna")
        if self.params.isUnstrandedRNA :
            hygenCmd.append("--unstranded")

        hygenTaskLabel=preJoin(taskPrefix,"generateCandidateSV_"+binStr)
        hygenTasks.add(self.addTask(hygenTaskLabel,hygenCmd,dependencies=dirTask, memMb=hyGenMemMb))

    nextStepWait = copy.deepcopy(hygenTasks)

    def getVcfSortCmd(vcfPaths, outPath, isDiploid) :
        cmd  = "%s -E %s -u " % (sys.executable,self.params.mantaSortVcf)
        cmd += " ".join(vcfPaths)

        # apply the ploidy filter to diploid variants
        if isDiploid:
            tempVcf = self.paths.getTempDiploidPath()
            cmd += " > %s" % (tempVcf)
            cmd += " && %s %s" % (self.params.mantaPloidyFilter, tempVcf)

        cmd += " | %s -c > %s && %s -p vcf %s" % (self.params.bgzipBin, outPath, self.params.tabixBin, outPath)

        if isDiploid:
            cmd += " && rm -f %s" % (self.paths.getTempDiploidPath())
        return cmd

    def sortVcfs(pathList, outPath, label, isDiploid=False) :
        if len(pathList) == 0 : return set()
        sortCmd = getVcfSortCmd(pathList, outPath, isDiploid)
        sortLabel=preJoin(taskPrefix,label)
        nextStepWait.add(self.addTask(sortLabel,sortCmd,dependencies=hygenTasks))
        return sortLabel

    candSortTask = sortVcfs(candidateVcfPaths,
                            self.paths.getSortedCandidatePath(),
                            "sortCandidateSV")
    sortVcfs(diploidVcfPaths,
             self.paths.getSortedDiploidPath(),
             "sortDiploidSV",
             isDiploid=True)
    sortVcfs(somaticVcfPaths,
             self.paths.getSortedSomaticPath(),
             "sortSomaticSV")
    sortVcfs(tumorVcfPaths,
             self.paths.getSortedTumorPath(),
             "sortTumorSV")

    def getExtractSmallCmd(maxSize, inPath, outPath) :
        cmd  = "%s -dc %s" % (self.params.bgzipBin, inPath)
        cmd += " | %s -E %s --maxSize %i" % (sys.executable, self.params.mantaExtraSmallVcf, maxSize)
        cmd += " | %s -c > %s" % (self.params.bgzipBin, outPath)
        cmd += " && %s -p vcf %s" % (self.params.tabixBin, outPath)
        return cmd

    def extractSmall(inPath, outPath) :
        maxSize = int(self.params.minScoredVariantSize) - 1
        if maxSize < 1 : return

        smallCmd = getExtractSmallCmd(maxSize, inPath, outPath)
        smallLabel=preJoin(taskPrefix,"extractSmallIndels")
        nextStepWait.add(self.addTask(smallLabel, smallCmd, dependencies=candSortTask, isForceLocal=True))

    extractSmall(self.paths.getSortedCandidatePath(), self.paths.getSortedCandidateSmallIndelsPath())

    # sort edge logs:
    edgeSortLabel=preJoin(taskPrefix,"sortEdgeRuntimeLogs")
    edgeSortCmd="sort -rnk2 " + " ".join(edgeRuntimeLogPaths) + " >| " + self.paths.getSortedEdgeRuntimeLogPath()
    self.addTask(edgeSortLabel, edgeSortCmd, dependencies=hygenTasks, isForceLocal=True)

    # merge edge stats:
    edgeStatsMergeLabel=preJoin(taskPrefix,"mergeEdgeStats")
    edgeStatsMergeCmd=[self.params.mantaStatsMergeBin]
    for statsFile in edgeStatsLogPaths :
        edgeStatsMergeCmd.extend(["--stats-file",statsFile])
    edgeStatsMergeCmd.extend(["--output-file",self.paths.getFinalEdgeStatsPath()])
    edgeStatsMergeCmd.extend(["--report-file",self.paths.getFinalEdgeStatsReportPath()])
    self.addTask(edgeStatsMergeLabel, edgeStatsMergeCmd, dependencies=hygenTasks, isForceLocal=True)

    return nextStepWait
Example #46
0
def runHyGen(self, taskPrefix="", dependencies=None) :
    """
    Run hypothesis generation on each SV locus
    """

    import copy

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    hygenDir=self.paths.getHyGenDir()

    makeHyGenDirCmd = getMkdirCmd() + [hygenDir]
    dirTask = self.addTask(preJoin(taskPrefix,"makeHyGenDir"), makeHyGenDirCmd, dependencies=dependencies, isForceLocal=True)

    isTumorNormal = (len(self.params.normalBamList) and len(self.params.tumorBamList))
    isTumorOnly = ((not isTumorNormal) and len(self.params.tumorBamList))

    hygenTasks=set()
    if self.params.isGenerateSupportBam :
        sortBamVcfTasks = set()

    self.candidateVcfPaths = []
    self.diploidVcfPaths = []
    self.somaticVcfPaths = []
    self.tumorVcfPaths = []
    self.rnaVcfPaths = []

    edgeRuntimeLogPaths = []
    edgeStatsLogPaths = []

    for binId in range(self.params.nonlocalWorkBins) :
        binStr = str(binId).zfill(4)
        self.candidateVcfPaths.append(self.paths.getHyGenCandidatePath(binStr))
        if isTumorOnly :
            self.tumorVcfPaths.append(self.paths.getHyGenTumorPath(binStr))
        elif self.params.isRNA:
            self.rnaVcfPaths.append(self.paths.getHyGenRnaPath(binStr))
        else:
            self.diploidVcfPaths.append(self.paths.getHyGenDiploidPath(binStr))
            if isTumorNormal :
                self.somaticVcfPaths.append(self.paths.getHyGenSomaticPath(binStr))

        hygenCmd = [ self.params.mantaHyGenBin ]
        hygenCmd.extend(["--align-stats",statsPath])
        hygenCmd.extend(["--graph-file",graphPath])
        hygenCmd.extend(["--bin-index", str(binId)])
        hygenCmd.extend(["--bin-count", str(self.params.nonlocalWorkBins)])
        hygenCmd.extend(["--max-edge-count", str(self.params.graphNodeMaxEdgeCount)])
        hygenCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        hygenCmd.extend(["--min-candidate-spanning-count", self.params.minCandidateSpanningCount])
        hygenCmd.extend(["--min-scored-sv-size", self.params.minScoredVariantSize])
        hygenCmd.extend(["--ref",self.params.referenceFasta])
        hygenCmd.extend(["--candidate-output-file", self.candidateVcfPaths[-1]])

        # tumor-only mode
        if isTumorOnly :
            hygenCmd.extend(["--tumor-output-file", self.tumorVcfPaths[-1]])
        elif self.params.isRNA:
            hygenCmd.extend(["--rna-output-file", self.rnaVcfPaths[-1]])
        else:
            hygenCmd.extend(["--diploid-output-file", self.diploidVcfPaths[-1]])
            hygenCmd.extend(["--min-qual-score", self.params.minDiploidVariantScore])
            hygenCmd.extend(["--min-pass-qual-score", self.params.minPassDiploidVariantScore])
            hygenCmd.extend(["--min-pass-gt-score", self.params.minPassDiploidGTScore])
            # tumor/normal mode
            if isTumorNormal :
                hygenCmd.extend(["--somatic-output-file", self.somaticVcfPaths[-1]])
                hygenCmd.extend(["--min-somatic-score", self.params.minSomaticScore])
                hygenCmd.extend(["--min-pass-somatic-score", self.params.minPassSomaticScore])

        # Setup remote read retrieval for insertions:
        def isEnableRemoteReadRetrieval() :
            if isTumorOnly or isTumorNormal :
                return self.params.enableRemoteReadRetrievalForInsertionsInCancerCallingModes
            else :
                return self.params.enableRemoteReadRetrievalForInsertionsInGermlineCallingModes

        if isEnableRemoteReadRetrieval() :
            hygenCmd.append("--enable-remote-read-retrieval")

        if self.params.isHighDepthFilter :
            hygenCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        edgeRuntimeLogPaths.append(self.paths.getHyGenEdgeRuntimeLogPath(binStr))
        hygenCmd.extend(["--edge-runtime-log", edgeRuntimeLogPaths[-1]])

        edgeStatsLogPaths.append(self.paths.getHyGenEdgeStatsPath(binStr))
        hygenCmd.extend(["--edge-stats-log", edgeStatsLogPaths[-1]])

        if self.params.isGenerateSupportBam :
            hygenCmd.extend(["--evidence-bam-stub", self.paths.getSupportBamStub(binStr)])

        for bamPath in self.params.normalBamList :
            hygenCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList :
            hygenCmd.extend(["--tumor-align-file", bamPath])

        if self.params.isIgnoreAnomProperPair :
            hygenCmd.append("--ignore-anom-proper-pair")

        if self.params.useOverlapPairEvidence:
            hygenCmd.append("--use-overlapping-pair")

        if self.params.isRNA :
            hygenCmd.append("--rna")
            if self.params.isUnstrandedRNA :
                hygenCmd.append("--unstranded")

        if self.params.isOutputContig :
            hygenCmd.append("--output-contigs")

        hygenTask = preJoin(taskPrefix,"generateCandidateSV_"+binStr)
        hygenTasks.add(self.addTask(hygenTask,hygenCmd,dependencies=dirTask, memMb=self.params.hyGenMemMb))

        # TODO: if the bam is large, for efficiency, consider
        # 1) filtering the bin-specific bam first w.r.t. the final candidate vcf
        # 2) then sort the bin-specific bam and merge them
        # This would require moving the filter/sort bam jobs outside the hygen loop
        if self.params.isGenerateSupportBam :
            bamIndex = 0
            # sort supporting bams extracted from normal samples
            bamIndex  = sortBams(self, sortBamVcfTasks,
                                 taskPrefix=taskPrefix, binStr=binStr,
                                 isNormal=True, bamIdx=bamIndex,
                                 dependencies=hygenTask)
            # sort supporting bams extracted from tumor samples
            bamIndex = sortBams(self, sortBamVcfTasks,
                                taskPrefix=taskPrefix, binStr=binStr,
                                isNormal=False, bamIdx=bamIndex,
                                dependencies=hygenTask)

    vcfTasks = sortAllVcfs(self,taskPrefix=taskPrefix,dependencies=hygenTasks)
    nextStepWait = copy.deepcopy(hygenTasks)

    if self.params.isGenerateSupportBam :
        sortBamVcfTasks.union(vcfTasks)
        mergeBamTasks = set()
        bamCount = 0
        # merge supporting bams for each normal sample
        bamCount = mergeSupportBams(self, mergeBamTasks, taskPrefix=taskPrefix,
                                    isNormal=True, bamIdx=bamCount,
                                    dependencies=sortBamVcfTasks)

        # merge supporting bams for each tumor sample
        bamCount = mergeSupportBams(self, mergeBamTasks, taskPrefix=taskPrefix,
                                    isNormal=False, bamIdx=bamCount,
                                    dependencies=sortBamVcfTasks)

        nextStepWait = nextStepWait.union(sortBamVcfTasks)
        nextStepWait = nextStepWait.union(mergeBamTasks)

    #
    # sort the edge runtime logs
    #
    logListFile = self.paths.getEdgeRuntimeLogListPath()
    logListTask = preJoin(taskPrefix,"sortEdgeRuntimeLogsInputList")
    self.addWorkflowTask(logListTask,listFileWorkflow(logListFile,edgeRuntimeLogPaths),dependencies=hygenTasks)

    def getEdgeLogSortCmd(logListFile, outPath) :
        cmd  = [sys.executable, self.params.mantaSortEdgeLogs,"-f", logListFile,"-o",outPath]
        return cmd

    edgeSortCmd=getEdgeLogSortCmd(logListFile,self.paths.getSortedEdgeRuntimeLogPath())
    self.addTask(preJoin(taskPrefix,"sortEdgeRuntimeLogs"), edgeSortCmd, dependencies=logListTask, isForceLocal=True)

    #
    # merge all edge stats
    #
    statsFileList = self.paths.getStatsFileListPath()
    statsListTask = preJoin(taskPrefix,"mergeEdgeStatsInputList")
    self.addWorkflowTask(statsListTask,listFileWorkflow(statsFileList,edgeStatsLogPaths),dependencies=hygenTasks)

    edgeStatsMergeTask=preJoin(taskPrefix,"mergeEdgeStats")
    edgeStatsMergeCmd=[self.params.mantaStatsMergeBin]
    edgeStatsMergeCmd.extend(["--stats-file-list",statsFileList])
    edgeStatsMergeCmd.extend(["--output-file",self.paths.getFinalEdgeStatsPath()])
    edgeStatsMergeCmd.extend(["--report-file",self.paths.getFinalEdgeStatsReportPath()])
    self.addTask(edgeStatsMergeTask, edgeStatsMergeCmd, dependencies=statsListTask, isForceLocal=True)

    if not self.params.isRetainTempFiles :
        # we could delete the temp hygenDir directory here, but it is used for debug so frequently it doesn't seem worth it at present.
        # rmDirCmd = getRmdirCmd() + [hygenDir]
        # rmDirTask=self.addTask(preJoin(taskPrefix,"removeTmpDir"),rmDirCmd,dependencies=TBD_XXX_MANY)
        pass

    return nextStepWait
Example #47
0
def runHyGen(self, taskPrefix="", dependencies=None) :
    """
    Run hypothesis generation on each SV locus
    """

    import copy

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    hygenDir=self.paths.getHyGenDir()

    dirTask=self.addTask(preJoin(taskPrefix,"makeHyGenDir"), "mkdir -p "+ hygenDir, dependencies=dependencies, isForceLocal=True)

    isSomatic = (len(self.params.normalBamList) and len(self.params.tumorBamList))

    hyGenMemMb = self.params.hyGenLocalMemMb
    if self.getRunMode() == "sge" :
        hyGenMemMb = self.params.hyGenSGEMemMb

    hygenTasks=set()
    candidateVcfPaths = []
    diploidVcfPaths = []
    somaticVcfPaths = []

    edgeLogPaths = []

    for binId in range(self.params.nonlocalWorkBins) :
        binStr = str(binId).zfill(4)
        candidateVcfPaths.append(self.paths.getHyGenCandidatePath(binStr))
        diploidVcfPaths.append(self.paths.getHyGenDiploidPath(binStr))
        if isSomatic :
            somaticVcfPaths.append(self.paths.getHyGenSomaticPath(binStr))

        edgeLogPaths.append(self.paths.getHyGenEdgeLogPath(binStr))

        hygenCmd = [ self.params.mantaHyGenBin ]
        hygenCmd.extend(["--align-stats",statsPath])
        hygenCmd.extend(["--graph-file",graphPath])
        hygenCmd.extend(["--bin-index", str(binId)])
        hygenCmd.extend(["--bin-count", str(self.params.nonlocalWorkBins)])
        hygenCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        hygenCmd.extend(["--min-scored-sv-size", self.params.minScoredVariantSize])
        hygenCmd.extend(["--ref",self.params.referenceFasta])
        hygenCmd.extend(["--candidate-output-file", candidateVcfPaths[-1]])
        hygenCmd.extend(["--diploid-output-file", diploidVcfPaths[-1]])
        hygenCmd.extend(["--min-qual-score", self.params.minDiploidVariantScore])
        hygenCmd.extend(["--min-pass-gt-score", self.params.minPassGTScore])
        if isSomatic :
            hygenCmd.extend(["--somatic-output-file", somaticVcfPaths[-1]])
            hygenCmd.extend(["--min-somatic-score", self.params.minSomaticScore])
            hygenCmd.extend(["--min-pass-somatic-score", self.params.minPassSomaticScore])

        if self.params.isHighDepthFilter :
            hygenCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        hygenCmd.extend(["--edge-runtime-log", edgeLogPaths[-1]])

        for bamPath in self.params.normalBamList :
            hygenCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList :
            hygenCmd.extend(["--tumor-align-file", bamPath])

        if self.params.isIgnoreAnomProperPair :
            hygenCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            hygenCmd.append("--rna")

        hygenTaskLabel=preJoin(taskPrefix,"generateCandidateSV_"+binStr)
        hygenTasks.add(self.addTask(hygenTaskLabel,hygenCmd,dependencies=dirTask, memMb=hyGenMemMb))

    nextStepWait = copy.deepcopy(hygenTasks)

    def getVcfSortCmd(vcfPaths, outPath) :
        cmd  = "%s -E %s -u " % (sys.executable,self.params.mantaSortVcf)
        cmd += " ".join(vcfPaths)
        cmd += " | %s -c > %s && %s -p vcf %s" % (self.params.bgzipBin, outPath, self.params.tabixBin, outPath)
        return cmd

    def sortVcfs(pathList, outPath, label) :
        if len(pathList) == 0 : return

        sortCmd = getVcfSortCmd(pathList,outPath)
        sortLabel=preJoin(taskPrefix,label)
        nextStepWait.add(self.addTask(sortLabel,sortCmd,dependencies=hygenTasks))

    sortVcfs(candidateVcfPaths, self.paths.getSortedCandidatePath(), "sortCandidateSV")
    sortVcfs(diploidVcfPaths, self.paths.getSortedDiploidPath(), "sortDiploidSV")
    sortVcfs(somaticVcfPaths, self.paths.getSortedSomaticPath(), "sortSomaticSV")

    # sort edge logs:
    edgeSortLabel=preJoin(taskPrefix,"sortEdgeLogs")
    edgeSortCmd="sort -rnk2 " + " ".join(edgeLogPaths) + " >| " + self.paths.getSortedEdgeLogPath()
    self.addTask(edgeSortLabel, edgeSortCmd, dependencies=hygenTasks, isForceLocal=True)

    return nextStepWait
Example #48
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    tmpGraphDir=self.paths.getTmpGraphDir()

    makeTmpGraphDirCmd = getMkdirCmd() + [tmpGraphDir]
    dirTask = self.addTask(preJoin(taskPrefix,"makeGraphTmpDir"), makeTmpGraphDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gsegGroup in getGenomeSegmentGroups(getNextGenomeSegment(self.params)) :
        assert(len(gsegGroup) != 0)
        gid=gsegGroup[0].id
        if len(gsegGroup) > 1 :
            gid += "_to_"+gsegGroup[-1].id
        tmpGraphFiles.append(self.paths.getTmpGraphFile(gid))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        for gseg in gsegGroup :
            graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTask=preJoin(taskPrefix,"makeLocusGraph_"+gid)
        graphTasks.add(self.addTask(graphTask,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    tmpGraphFileList = self.paths.getTmpGraphFileListPath()
    tmpGraphFileListTask = preJoin(taskPrefix,"mergeLocusGraphInputList")
    self.addWorkflowTask(tmpGraphFileListTask,listFileWorkflow(tmpGraphFileList,tmpGraphFiles),dependencies=graphTasks)

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    mergeCmd.extend(["--graph-file-list",tmpGraphFileList])
    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=tmpGraphFileListTask,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    if not self.params.isRetainTempFiles :
        rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
        rmTask=self.addTask(preJoin(taskPrefix,"removeTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = [self.params.mantaGraphStatsBin,"--global"]
    graphStatsCmd.extend(["--graph-file",graphPath])
    graphStatsCmd.extend(["--output-file",graphStatsPath])

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Example #49
0
 def sortVcfs(pathList, outPath, label, isDiploid=False) :
     if len(pathList) == 0 : return set()
     sortCmd = getVcfSortCmd(pathList, outPath, isDiploid)
     sortTask=self.addTask(preJoin(taskPrefix,"sort_"+label),sortCmd,dependencies=hygenTasks)
     nextStepWait.add(self.addTask(preJoin(taskPrefix,"tabix_"+label),getVcfTabixCmd(outPath),dependencies=sortTask,isForceLocal=True))
     return sortTask
Example #50
0
def runHyGen(self, taskPrefix="", dependencies=None) :
    """
    Run hypothesis generation on each SV locus
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    hygenDir=self.paths.getHyGenDir()

    dirTask=self.addTask(preJoin(taskPrefix,"makeHyGenDir"), "mkdir -p "+ hygenDir, dependencies=dependencies, isForceLocal=True)

    isSomatic = (len(self.params.normalBamList) and len(self.params.tumorBamList))

    hygenTasks=set()
    candidateVcfPaths = []
    somaticVcfPaths = []

    for binId in range(self.params.nonlocalWorkBins) :
        binStr = str(binId).zfill(4)
        candidateVcfPaths.append(self.paths.getHyGenCandidatePath(binStr))
        if isSomatic :
            somaticVcfPaths.append(self.paths.getHyGenSomaticPath(binStr))

        hygenCmd = [ self.params.mantaHyGenBin ]
        hygenCmd.extend(["--align-stats",statsPath])
        hygenCmd.extend(["--graph-file",graphPath])
        hygenCmd.extend(["--bin-index", str(binId)])
        hygenCmd.extend(["--bin-count", str(self.params.nonlocalWorkBins)])
        hygenCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        hygenCmd.extend(["--min-scored-sv-size", self.params.minScoredVariantSize])
        hygenCmd.extend(["--ref",self.params.referenceFasta])
        hygenCmd.extend(["--candidate-output-file", candidateVcfPaths[-1]])
        if isSomatic :
            hygenCmd.extend(["--somatic-output-file", somaticVcfPaths[-1]])

        if not self.params.isExome :
            hygenCmd.extend(["--chrom-depth", self.paths.getChromDepth()])


        for bamPath in self.params.normalBamList :
            hygenCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList :
            hygenCmd.extend(["--tumor-align-file", bamPath])

        hygenTaskLabel=preJoin(taskPrefix,"generateCandidateSV_"+binStr)
        hygenTasks.add(self.addTask(hygenTaskLabel,hygenCmd,dependencies=dirTask, memMb=self.params.hyGenMemMb))

    nextStepWait = hygenTasks


    def getVcfSortCmd(vcfPaths, outPath) :
        cmd  = "%s -E %s " % (sys.executable,self.params.mantaSortVcf)
        cmd += " ".join(vcfPaths)
        cmd += " | %s -c > %s && %s -p vcf %s" % (self.params.bgzipBin, outPath, self.params.tabixBin, outPath)
        return cmd

    # consolidate output:
    if len(candidateVcfPaths) :
        outPath = self.paths.getSortedCandidatePath()
        candSortCmd = getVcfSortCmd(candidateVcfPaths,outPath)
        candSortLabel=preJoin(taskPrefix,"sortCandidateSV")
        nextStepWait.add(self.addTask(candSortLabel,candSortCmd,dependencies=hygenTasks))

    if len(somaticVcfPaths) :
        outPath = self.paths.getSortedSomaticPath()
        candSortCmd = getVcfSortCmd(somaticVcfPaths,outPath)
        candSortLabel=preJoin(taskPrefix,"sortSomaticSV")
        nextStepWait.add(self.addTask(candSortLabel,candSortCmd,dependencies=hygenTasks))

    return nextStepWait
Example #51
0
 def sortVcfs(pathList, outPath, label, isDiploid=False) :
     if len(pathList) == 0 : return set()
     sortCmd = getVcfSortCmd(pathList, outPath, isDiploid)
     sortLabel=preJoin(taskPrefix,label)
     nextStepWait.add(self.addTask(sortLabel,sortCmd,dependencies=hygenTasks))
     return sortLabel
Example #52
0
def runHyGen(self, taskPrefix="", dependencies=None) :
    """
    Run hypothesis generation on each SV locus
    """

    import copy

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    hygenDir=self.paths.getHyGenDir()

    makeHyGenDirCmd = getMkdirCmd() + [hygenDir]
    dirTask = self.addTask(preJoin(taskPrefix,"makeHyGenDir"), makeHyGenDirCmd, dependencies=dependencies, isForceLocal=True)

    isSomatic = (len(self.params.normalBamList) and len(self.params.tumorBamList))
    isTumorOnly = ((not isSomatic) and len(self.params.tumorBamList))

    hyGenMemMb = self.params.hyGenLocalMemMb
    if self.getRunMode() == "sge" :
        hyGenMemMb = self.params.hyGenSGEMemMb

    hygenTasks=set()
    self.candidateVcfPaths = []
    self.diploidVcfPaths = []
    self.somaticVcfPaths = []
    self.tumorVcfPaths = []

    edgeRuntimeLogPaths = []
    edgeStatsLogPaths = []

    for binId in range(self.params.nonlocalWorkBins) :
        binStr = str(binId).zfill(4)
        self.candidateVcfPaths.append(self.paths.getHyGenCandidatePath(binStr))
        if isTumorOnly :
            self.tumorVcfPaths.append(self.paths.getHyGenTumorPath(binStr))
        else:
            self.diploidVcfPaths.append(self.paths.getHyGenDiploidPath(binStr))
            if isSomatic :
                self.somaticVcfPaths.append(self.paths.getHyGenSomaticPath(binStr))

        hygenCmd = [ self.params.mantaHyGenBin ]
        hygenCmd.extend(["--align-stats",statsPath])
        hygenCmd.extend(["--graph-file",graphPath])
        hygenCmd.extend(["--bin-index", str(binId)])
        hygenCmd.extend(["--bin-count", str(self.params.nonlocalWorkBins)])
        hygenCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        hygenCmd.extend(["--min-candidate-spanning-count", self.params.minCandidateSpanningCount])
        hygenCmd.extend(["--min-scored-sv-size", self.params.minScoredVariantSize])
        hygenCmd.extend(["--ref",self.params.referenceFasta])
        hygenCmd.extend(["--candidate-output-file", self.candidateVcfPaths[-1]])

        # tumor-only mode
        if isTumorOnly :
            hygenCmd.extend(["--tumor-output-file", self.tumorVcfPaths[-1]])
        else:
            hygenCmd.extend(["--diploid-output-file", self.diploidVcfPaths[-1]])
            hygenCmd.extend(["--min-qual-score", self.params.minDiploidVariantScore])
            hygenCmd.extend(["--min-pass-qual-score", self.params.minPassDiploidVariantScore])
            hygenCmd.extend(["--min-pass-gt-score", self.params.minPassDiploidGTScore])
            # tumor/normal mode
            if isSomatic :
                hygenCmd.extend(["--somatic-output-file", self.somaticVcfPaths[-1]])
                hygenCmd.extend(["--min-somatic-score", self.params.minSomaticScore])
                hygenCmd.extend(["--min-pass-somatic-score", self.params.minPassSomaticScore])

                # temporary fix for FFPE:
                hygenCmd.append("--skip-remote-reads")

        if self.params.isHighDepthFilter :
            hygenCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        edgeRuntimeLogPaths.append(self.paths.getHyGenEdgeRuntimeLogPath(binStr))
        hygenCmd.extend(["--edge-runtime-log", edgeRuntimeLogPaths[-1]])

        edgeStatsLogPaths.append(self.paths.getHyGenEdgeStatsPath(binStr))
        hygenCmd.extend(["--edge-stats-log", edgeStatsLogPaths[-1]])

        for bamPath in self.params.normalBamList :
            hygenCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList :
            hygenCmd.extend(["--tumor-align-file", bamPath])

        if self.params.isIgnoreAnomProperPair :
            hygenCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            hygenCmd.append("--rna")
        if self.params.isUnstrandedRNA :
            hygenCmd.append("--unstranded")

        hygenTask=preJoin(taskPrefix,"generateCandidateSV_"+binStr)
        hygenTasks.add(self.addTask(hygenTask,hygenCmd,dependencies=dirTask, memMb=hyGenMemMb))

    nextStepWait = copy.deepcopy(hygenTasks)

    sortAllVcfs(self,taskPrefix=taskPrefix,dependencies=hygenTasks)

    #
    # sort the edge runtime logs
    #
    logListFile = self.paths.getEdgeRuntimeLogListPath()
    logListTask = preJoin(taskPrefix,"sortEdgeRuntimeLogsInputList")
    self.addWorkflowTask(logListTask,listFileWorkflow(logListFile,edgeRuntimeLogPaths),dependencies=hygenTasks)

    def getEdgeLogSortCmd(logListFile, outPath) :
        cmd  = [sys.executable,"-E",self.params.mantaSortEdgeLogs,"-f", logListFile,"-o",outPath]
        return cmd

    edgeSortCmd=getEdgeLogSortCmd(logListFile,self.paths.getSortedEdgeRuntimeLogPath())
    self.addTask(preJoin(taskPrefix,"sortEdgeRuntimeLogs"), edgeSortCmd, dependencies=logListTask, isForceLocal=True)

    #
    # merge all edge stats
    #
    statsFileList = self.paths.getStatsFileListPath()
    statsListTask = preJoin(taskPrefix,"mergeEdgeStatsInputList")
    self.addWorkflowTask(statsListTask,listFileWorkflow(statsFileList,edgeStatsLogPaths),dependencies=hygenTasks)

    edgeStatsMergeTask=preJoin(taskPrefix,"mergeEdgeStats")
    edgeStatsMergeCmd=[self.params.mantaStatsMergeBin]
    edgeStatsMergeCmd.extend(["--stats-file-list",statsFileList])
    edgeStatsMergeCmd.extend(["--output-file",self.paths.getFinalEdgeStatsPath()])
    edgeStatsMergeCmd.extend(["--report-file",self.paths.getFinalEdgeStatsReportPath()])
    self.addTask(edgeStatsMergeTask, edgeStatsMergeCmd, dependencies=statsListTask, isForceLocal=True)

    if not self.params.isRetainTempFiles :
        # we could delete the temp hygenDir directory here, but it is used for debug so frequently it doesn't seem worth it at present.
        # rmDirCmd = getRmdirCmd() + [hygenDir]
        # rmDirTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmDirCmd,dependencies=TBD_XXX_MANY)
        pass

    return nextStepWait
Example #53
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    graphFilename=os.path.basename(graphPath)
    tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir")

    makeTmpDirCmd = getMkdirCmd() + [tmpGraphDir]
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), makeTmpDirCmd, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    def getGenomeSegmentGroups(params) :
        """
        Iterate segment groups and 'clump' small contigs together
        """
        minSegmentGroupSize=200000
        group = []
        headSize = 0
        for gseg in getNextGenomeSegment(self.params) :
            if headSize+gseg.size() <= minSegmentGroupSize :
                group.append(gseg)
                headSize += gseg.size()
            else :
                if len(group) != 0 : yield(group)
                group = [gseg]
                headSize = gseg.size()
        if len(group) != 0 : yield(group)

    for gsegGroup in getGenomeSegmentGroups(self.params) :
        assert(len(gsegGroup) != 0)
        gid=gsegGroup[0].id
        if len(gsegGroup) > 1 :
            gid += "_to_"+gsegGroup[-1].id
        tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gid+".bin"))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        for gseg in gsegGroup :
            graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gid)
        graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    for gfile in tmpGraphFiles :
        mergeCmd.extend(["--graph-file", gfile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    rmGraphTmpCmd = getRmdirCmd() + [tmpGraphDir]
    rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = [self.params.mantaGraphStatsBin,"--global"]
    graphStatsCmd.extend(["--graph-file",graphPath])
    graphStatsCmd.extend(["--output-file",graphStatsPath])

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Example #54
0
 def depthFunc(self,taskPrefix,dependencies,bamFile,outFile) :
     cmd  = "%s -E '%s'" % (sys.executable, self.params.getChromDepth)
     cmd += " --bam '%s'" % (bamFile)
     cmd += " > %s" % (outFile)
     return self.addTask(preJoin(taskPrefix,"estimateChromDepth"),cmd,dependencies=dependencies)
Example #55
0
def runLocusGraph(self,taskPrefix="",dependencies=None):
    """
    Create the full SV locus graph
    """

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    graphStatsPath=self.paths.getGraphStatsPath()

    graphFilename=os.path.basename(graphPath)
    tmpGraphDir=os.path.join(self.params.workDir,graphFilename+".tmpdir")
    dirTask=self.addTask(preJoin(taskPrefix,"makeTmpDir"), "mkdir -p "+tmpGraphDir, dependencies=dependencies, isForceLocal=True)

    tmpGraphFiles = []
    graphTasks = set()

    for gseg in getNextGenomeSegment(self.params) :

        tmpGraphFiles.append(os.path.join(tmpGraphDir,graphFilename+"."+gseg.id+".bin"))
        graphCmd = [ self.params.mantaGraphBin ]
        graphCmd.extend(["--output-file", tmpGraphFiles[-1]])
        graphCmd.extend(["--align-stats",statsPath])
        graphCmd.extend(["--region",gseg.bamRegion])
        graphCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        graphCmd.extend(["--min-edge-observations", self.params.minEdgeObservations])
        graphCmd.extend(["--ref",self.params.referenceFasta])
        for bamPath in self.params.normalBamList :
            graphCmd.extend(["--align-file",bamPath])
        for bamPath in self.params.tumorBamList :
            graphCmd.extend(["--tumor-align-file",bamPath])

        if self.params.isHighDepthFilter :
            graphCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        if self.params.isIgnoreAnomProperPair :
            graphCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            graphCmd.append("--rna")

        graphTaskLabel=preJoin(taskPrefix,"makeLocusGraph_"+gseg.pyflowId)
        graphTasks.add(self.addTask(graphTaskLabel,graphCmd,dependencies=dirTask,memMb=self.params.estimateMemMb))

    if len(tmpGraphFiles) == 0 :
        raise Exception("No SV Locus graphs to create. Possible target region parse error.")

    mergeCmd = [ self.params.mantaGraphMergeBin ]
    mergeCmd.extend(["--output-file", graphPath])
    for gfile in tmpGraphFiles :
        mergeCmd.extend(["--graph-file", gfile])

    mergeTask = self.addTask(preJoin(taskPrefix,"mergeLocusGraph"),mergeCmd,dependencies=graphTasks,memMb=self.params.mergeMemMb)

    # Run a separate process to rigorously check that the final graph is valid, the sv candidate generators will check as well, but
    # this makes the check much more clear:

    checkCmd = [ self.params.mantaGraphCheckBin ]
    checkCmd.extend(["--graph-file", graphPath])
    checkTask = self.addTask(preJoin(taskPrefix,"checkLocusGraph"),checkCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    rmGraphTmpCmd = "rm -rf " + tmpGraphDir
    rmTask=self.addTask(preJoin(taskPrefix,"rmTmpDir"),rmGraphTmpCmd,dependencies=mergeTask)

    graphStatsCmd  = self.params.mantaGraphStatsBin
    graphStatsCmd += " --global"
    graphStatsCmd += " --graph-file " + graphPath
    graphStatsCmd += " >| " + graphStatsPath

    graphStatsTask = self.addTask(preJoin(taskPrefix,"locusGraphStats"),graphStatsCmd,dependencies=mergeTask,memMb=self.params.mergeMemMb)

    nextStepWait = set()
    nextStepWait.add(checkTask)
    return nextStepWait
Example #56
0
def runHyGen(self, taskPrefix="", dependencies=None) :
    """
    Run hypothesis generation on each SV locus
    """

    import copy

    statsPath=self.paths.getStatsPath()
    graphPath=self.paths.getGraphPath()
    hygenDir=self.paths.getHyGenDir()

    makeHyGenDirCmd = getMkdirCmd() + [hygenDir]
    dirTask = self.addTask(preJoin(taskPrefix,"makeHyGenDir"), makeHyGenDirCmd, dependencies=dependencies, isForceLocal=True)

    isSomatic = (len(self.params.normalBamList) and len(self.params.tumorBamList))
    isTumorOnly = ((not isSomatic) and len(self.params.tumorBamList))

    hyGenMemMb = self.params.hyGenLocalMemMb
    if self.getRunMode() == "sge" :
        hyGenMemMb = self.params.hyGenSGEMemMb

    hygenTasks=set()
    candidateVcfPaths = []
    diploidVcfPaths = []
    somaticVcfPaths = []
    tumorVcfPaths = []

    edgeRuntimeLogPaths = []
    edgeStatsLogPaths = []

    for binId in range(self.params.nonlocalWorkBins) :
        binStr = str(binId).zfill(4)
        candidateVcfPaths.append(self.paths.getHyGenCandidatePath(binStr))
        if isTumorOnly :
            tumorVcfPaths.append(self.paths.getHyGenTumorPath(binStr))
        else:
            diploidVcfPaths.append(self.paths.getHyGenDiploidPath(binStr))
            if isSomatic :
                somaticVcfPaths.append(self.paths.getHyGenSomaticPath(binStr))

        hygenCmd = [ self.params.mantaHyGenBin ]
        hygenCmd.extend(["--align-stats",statsPath])
        hygenCmd.extend(["--graph-file",graphPath])
        hygenCmd.extend(["--bin-index", str(binId)])
        hygenCmd.extend(["--bin-count", str(self.params.nonlocalWorkBins)])
        hygenCmd.extend(["--min-candidate-sv-size", self.params.minCandidateVariantSize])
        hygenCmd.extend(["--min-candidate-spanning-count", self.params.minCandidateSpanningCount])
        hygenCmd.extend(["--min-scored-sv-size", self.params.minScoredVariantSize])
        hygenCmd.extend(["--ref",self.params.referenceFasta])
        hygenCmd.extend(["--candidate-output-file", candidateVcfPaths[-1]])

        # tumor-only mode
        if isTumorOnly :
            hygenCmd.extend(["--tumor-output-file", tumorVcfPaths[-1]])
        else:
            hygenCmd.extend(["--diploid-output-file", diploidVcfPaths[-1]])
            hygenCmd.extend(["--min-qual-score", self.params.minDiploidVariantScore])
            hygenCmd.extend(["--min-pass-qual-score", self.params.minPassDiploidVariantScore])
            hygenCmd.extend(["--min-pass-gt-score", self.params.minPassDiploidGTScore])
            # tumor/normal mode
            if isSomatic :
       	        hygenCmd.extend(["--somatic-output-file", somaticVcfPaths[-1]])
                hygenCmd.extend(["--min-somatic-score", self.params.minSomaticScore])
                hygenCmd.extend(["--min-pass-somatic-score", self.params.minPassSomaticScore])

                # temporary fix for FFPE:
                hygenCmd.append("--skip-remote-reads")

        if self.params.isHighDepthFilter :
            hygenCmd.extend(["--chrom-depth", self.paths.getChromDepth()])

        edgeRuntimeLogPaths.append(self.paths.getHyGenEdgeRuntimeLogPath(binStr))
        hygenCmd.extend(["--edge-runtime-log", edgeRuntimeLogPaths[-1]])

        edgeStatsLogPaths.append(self.paths.getHyGenEdgeStatsPath(binStr))
        hygenCmd.extend(["--edge-stats-log", edgeStatsLogPaths[-1]])

        for bamPath in self.params.normalBamList :
            hygenCmd.extend(["--align-file", bamPath])
        for bamPath in self.params.tumorBamList :
            hygenCmd.extend(["--tumor-align-file", bamPath])

        if self.params.isIgnoreAnomProperPair :
            hygenCmd.append("--ignore-anom-proper-pair")
        if self.params.isRNA :
            hygenCmd.append("--rna")
        if self.params.isUnstrandedRNA :
            hygenCmd.append("--unstranded")

        hygenTaskLabel=preJoin(taskPrefix,"generateCandidateSV_"+binStr)
        hygenTasks.add(self.addTask(hygenTaskLabel,hygenCmd,dependencies=dirTask, memMb=hyGenMemMb))

    nextStepWait = copy.deepcopy(hygenTasks)

    def getVcfSortCmd(vcfPaths, outPath, isDiploid) :
        cmd  = "\"%s\" -E \"%s\" -u " % (sys.executable,self.params.mantaSortVcf)
        cmd += " ".join(quoteStringList(vcfPaths))

        # apply the ploidy filter to diploid variants
        if isDiploid:
            tempVcf = self.paths.getTempDiploidPath()
            cmd += " > \"%s\"" % (tempVcf)
            cmd += " && \"%s\" -E \"%s\" \"%s\"" % (sys.executable, self.params.mantaPloidyFilter, tempVcf)

        cmd += " | \"%s\" -c > \"%s\"" % (self.params.bgzipBin, outPath)

        if isDiploid:
            cmd += " && " + " ".join(getRmCmd()) + " \"%s\"" % (self.paths.getTempDiploidPath())
        return cmd

    def getVcfTabixCmd(vcfPath) :
        return [self.params.tabixBin,"-f","-p","vcf", vcfPath]


    def sortVcfs(pathList, outPath, label, isDiploid=False) :
        if len(pathList) == 0 : return set()

        # make header modifications to first vcf in list of files to be sorted:
        headerFixTask=preJoin(taskPrefix,"fixVcfHeader_"+label)
        def getHeaderFixCmd(fileName) :
            tmpName=fileName+".reheader.tmp"
            cmd  = "\"%s\" -E \"%s\"" % (sys.executable, self.params.vcfCmdlineSwapper)
            cmd += ' "' + " ".join(self.params.configCommandLine) + '"'
            cmd += " < \"%s\" > \"%s\"" % (fileName,tmpName)
            cmd += " && " + " ".join(getMvCmd()) +  " \"%s\" \"%s\"" % (tmpName, fileName)
            return cmd

        self.addTask(headerFixTask,getHeaderFixCmd(pathList[0]),dependencies=hygenTasks,isForceLocal=True)

        sortCmd = getVcfSortCmd(pathList, outPath, isDiploid)
        sortTask=self.addTask(preJoin(taskPrefix,"sort_"+label),sortCmd,dependencies=headerFixTask)
        nextStepWait.add(self.addTask(preJoin(taskPrefix,"tabix_"+label),getVcfTabixCmd(outPath),dependencies=sortTask,isForceLocal=True))
        return sortTask

    candSortTask = sortVcfs(candidateVcfPaths,
                            self.paths.getSortedCandidatePath(),
                            "sortCandidateSV")
    sortVcfs(diploidVcfPaths,
             self.paths.getSortedDiploidPath(),
             "sortDiploidSV",
             isDiploid=True)
    sortVcfs(somaticVcfPaths,
             self.paths.getSortedSomaticPath(),
             "sortSomaticSV")
    sortVcfs(tumorVcfPaths,
             self.paths.getSortedTumorPath(),
             "sortTumorSV")

    def getExtractSmallCmd(maxSize, inPath, outPath) :
        cmd  = "\"%s\" -dc \"%s\"" % (self.params.bgzipBin, inPath)
        cmd += " | \"%s\" -E \"%s\" --maxSize %i" % (sys.executable, self.params.mantaExtraSmallVcf, maxSize)
        cmd += " | \"%s\" -c > \"%s\"" % (self.params.bgzipBin, outPath)
        return cmd

    def extractSmall(inPath, outPath) :
        maxSize = int(self.params.minScoredVariantSize) - 1
        if maxSize < 1 : return
        smallCmd = getExtractSmallCmd(maxSize, inPath, outPath)
        smallLabel=self.addTask(preJoin(taskPrefix,"extractSmallIndels"), smallCmd, dependencies=candSortTask, isForceLocal=True)
        nextStepWait.add(self.addTask(smallLabel+"_tabix", getVcfTabixCmd(outPath), dependencies=smallLabel, isForceLocal=True))

    extractSmall(self.paths.getSortedCandidatePath(), self.paths.getSortedCandidateSmallIndelsPath())

    # sort edge logs:
    def getEdgeLogSortCmd(logPaths, outPath) :
        cmd  = [sys.executable,"-E",self.params.mantaSortEdgeLogs,"-o",outPath]
        cmd.extend(logPaths)
        return cmd

    edgeSortLabel=preJoin(taskPrefix,"sortEdgeRuntimeLogs")
    edgeSortCmd=getEdgeLogSortCmd(edgeRuntimeLogPaths,self.paths.getSortedEdgeRuntimeLogPath())
    self.addTask(edgeSortLabel, edgeSortCmd, dependencies=hygenTasks, isForceLocal=True)

    # merge edge stats:
    edgeStatsMergeLabel=preJoin(taskPrefix,"mergeEdgeStats")
    edgeStatsMergeCmd=[self.params.mantaStatsMergeBin]
    for statsFile in edgeStatsLogPaths :
        edgeStatsMergeCmd.extend(["--stats-file",statsFile])
    edgeStatsMergeCmd.extend(["--output-file",self.paths.getFinalEdgeStatsPath()])
    edgeStatsMergeCmd.extend(["--report-file",self.paths.getFinalEdgeStatsReportPath()])
    self.addTask(edgeStatsMergeLabel, edgeStatsMergeCmd, dependencies=hygenTasks, isForceLocal=True)

    return nextStepWait