Python checkFixTabixListOption Examples, configureUtil.checkFixTabixListOption Python Examples

Example #1

0

Show file

File: strelkaSharedOptions.py Project: yafeng/strelka

    def validateAndSanitizeExistingOptions(self, options):

        options.runDir = os.path.abspath(options.runDir)

        options.referenceFasta = validateFixExistingFileArg(
            options.referenceFasta, "reference")

        # check for reference fasta index file:
        if options.referenceFasta is not None:
            faiFile = options.referenceFasta + ".fai"
            if not os.path.isfile(faiFile):
                raise OptParseException(
                    "Can't find expected fasta index file: '%s'" % (faiFile))

        checkFixTabixListOption(options.indelCandidatesList,
                                "candidate indel vcf")
        checkFixTabixListOption(options.forcedGTList, "forced genotype vcf")

        if (options.regionStrList is None) or (len(options.regionStrList)
                                               == 0):
            options.genomeRegionList = None
        else:
            options.genomeRegionList = [
                parseGenomeRegion(rr) for r in options.regionStrList
                for rr in r.split("+")
            ]

Example #2

0

Show file

File: configureStrelkaSomaticWorkflow.py Project: zero-raspberry/strelka

    def validateAndSanitizeOptions(self,options) :

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeOptions(self,options)

        checkFixTabixListOption(options.noiseVcfList,"noise vcf")

        groomBamList(options.normalBamList,"normal sample")
        groomBamList(options.tumorBamList, "tumor sample")

        def checkRequired(bamList,label):
            if (bamList is None) or (len(bamList) == 0) :
                raise OptParseException("No %s sample BAM/CRAM files specified" % (label))

        checkRequired(options.tumorBamList,"tumor")

        bamSetChecker = BamSetChecker()

        def singleAppender(bamList,label):
            if bamList is None : return
            if len(bamList) > 1 :
                raise OptParseException("More than one %s sample BAM/CRAM files specified" % (label))
            bamSetChecker.appendBams(bamList,label)

        singleAppender(options.normalBamList,"normal")
        singleAppender(options.tumorBamList,"tumor")
        bamSetChecker.check(options.htsfileBin,
                     options.referenceFasta)

Example #3

0

Show file

    def validateAndSanitizeOptions(self, options):

        assertOptionExists(options.runDir, "run directory")
        options.runDir = os.path.abspath(options.runDir)

        assertOptionExists(options.referenceFasta, "reference fasta file")
        options.referenceFasta = validateFixExistingFileArg(
            options.referenceFasta, "reference fasta file")

        # check for reference fasta index file:
        referenceFastaIndex = options.referenceFasta + ".fai"
        if not os.path.isfile(referenceFastaIndex):
            raise OptParseException(
                "Can't find expected fasta index file: '%s'" %
                (referenceFastaIndex))

        if options.isEstimateSequenceError:
            # Determine if dynamic error estimation is feasible based on the reference size
            # - Given reference contig set (S) with sequence length of at least 5 Mb
            # - The total sequence length from S must be at least 50 Mb

            class Constants:
                Megabase = 1000000
                minChromSize = options.errorEstimationMinChromMb * Megabase
                minTotalSize = options.errorEstimationMinTotalMb * Megabase

            # read fasta index
            (_, chromSizes) = getFastaChromOrderSize(referenceFastaIndex)

            totalEstimationSize = 0
            for chromSize in chromSizes.values():
                if chromSize < Constants.minChromSize: continue
                totalEstimationSize += chromSize

            if totalEstimationSize < Constants.minTotalSize:
                sys.stderr.write(
                    "WARNING: Cannot estimate sequence errors from data due to small or overly fragmented reference sequence. Sequence error estimation disabled.\n"
                )
                options.isEstimateSequenceError = False

        checkFixTabixListOption(options.indelCandidatesList,
                                "candidate indel vcf")
        checkFixTabixListOption(options.forcedGTList, "forced genotype vcf")
        options.callRegionsBed = checkFixTabixIndexedFileOption(
            options.callRegionsBed, "call-regions bed")

        if (options.regionStrList is None) or (len(options.regionStrList)
                                               == 0):
            options.genomeRegionList = None
        else:
            options.genomeRegionList = [
                parseGenomeRegion(rr) for r in options.regionStrList
                for rr in r.split("+")
            ]

        options.snvScoringModelFile = validateFixExistingFileArg(
            options.snvScoringModelFile, "SNV empirical scoring model file")
        options.indelScoringModelFile = validateFixExistingFileArg(
            options.indelScoringModelFile,
            "Indel empirical scoring model file")

Example #4

0

Show file

File: configureStrelkaSomaticWorkflow.py Project: yafeng/strelka

    def validateAndSanitizeExistingOptions(self, options):

        StrelkaSharedWorkflowOptionsBase.validateAndSanitizeExistingOptions(
            self, options)
        groomBamList(options.normalBamList, "normal sample")
        groomBamList(options.tumorBamList, "tumor sample")

        checkFixTabixListOption(options.noiseVcfList, "noise vcf")

        options.somaticSnvScoringModelFile = validateFixExistingFileArg(
            options.somaticSnvScoringModelFile,
            "Somatic SNV empirical scoring file")
        options.somaticIndelScoringModelFile = validateFixExistingFileArg(
            options.somaticIndelScoringModelFile,
            "Somatic indel empirical scoring file")

Example #5

0

Show file

    def validateAndSanitizeOptions(self, options):

        assertOptionExists(options.runDir, "run directory")
        options.runDir = os.path.abspath(options.runDir)

        workflowScriptPath = os.path.join(options.runDir,
                                          options.workflowScriptName)
        if os.path.exists(workflowScriptPath):
            raise OptParseException(
                "Run directory already contains workflow script file '%s'. Each analysis must be configured in a separate directory."
                % (workflowScriptPath))

        assertOptionExists(options.referenceFasta, "reference fasta file")
        options.referenceFasta = validateFixExistingFileArg(
            options.referenceFasta, "reference fasta file")

        # check for reference fasta index file:
        referenceFastaIndex = options.referenceFasta + ".fai"
        if not os.path.isfile(referenceFastaIndex):
            raise OptParseException(
                "Can't find expected fasta index file: '%s'" %
                (referenceFastaIndex))

        if options.isEstimateSequenceError:
            # Determine if dynamic error estimation is feasible based on the reference size
            # - Given reference contig set (S) with sequence length of at least 5 Mb
            # - The total sequence length from S must be at least 50 Mb

            class Constants:
                Megabase = 1000000
                minChromSize = options.errorEstimationMinChromMb * Megabase
                minTotalSize = options.errorEstimationMinTotalMb * Megabase

            # read fasta index
            (_, chromSizes) = getFastaChromOrderSize(referenceFastaIndex)

            totalEstimationSize = 0
            for chromSize in chromSizes.values():
                if chromSize < Constants.minChromSize: continue
                totalEstimationSize += chromSize

            if totalEstimationSize < Constants.minTotalSize:
                sys.stderr.write(
                    "WARNING: Cannot estimate sequence errors from data due to small or overly fragmented reference sequence. Sequence error estimation disabled.\n"
                )
                options.isEstimateSequenceError = False

        checkFixTabixListOption(options.indelCandidatesList,
                                "candidate indel vcf")
        checkFixTabixListOption(options.forcedGTList, "forced genotype vcf")
        options.callRegionsBed = checkFixTabixIndexedFileOption(
            options.callRegionsBed, "call-regions bed")

        def extendedRegionStrList():
            """
            A generator on the regionStrList which parses the (intentionally undocumented/possibly deprecated) '+' entry format
            to specify multiple regions in a single argument.
            """
            for r in options.regionStrList:
                for rr in r.split("+"):
                    yield rr

        if (options.regionStrList is None) or (len(options.regionStrList)
                                               == 0):
            options.genomeRegionList = None
        else:
            options.genomeRegionList = [
                parseGenomeRegion(r) for r in extendedRegionStrList()
            ]

        # validate chromosome names appearing in region tags and callRegions bed file
        if (options.callRegionsBed is not None) or (options.genomeRegionList
                                                    is not None):
            refChromInfo = getFastaInfo(options.referenceFasta)
            if options.callRegionsBed is not None:
                for chrom in getTabixChromSet(options.tabixBin,
                                              options.callRegionsBed):
                    if chrom not in refChromInfo:
                        raise OptParseException(
                            "Chromosome label '%s', in call regions bed file '%s', not found in reference genome."
                            % (chrom, options.callRegionsBed))

            if options.genomeRegionList is not None:
                for (genomeRegionIndex,
                     genomeRegion) in enumerate(options.genomeRegionList):
                    chrom = genomeRegion["chrom"]
                    if chrom not in refChromInfo:
                        raise OptParseException(
                            "Chromosome label '%s', parsed from region argument '%s', not found in reference genome."
                            % (chrom, list(
                                extendedRegionStrList())[genomeRegionIndex]))

        options.snvScoringModelFile = validateFixExistingFileArg(
            options.snvScoringModelFile, "SNV empirical scoring model file")
        options.indelScoringModelFile = validateFixExistingFileArg(
            options.indelScoringModelFile,
            "Indel empirical scoring model file")