예제 #1
0
파일: _bwa.py 프로젝트: AkiOhtani/biopython
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("index"),
                    _Option(["-a", "a", "algorithm"],
                            """Algorithm for constructing BWT index.

                            Available options are:
                             - is:    IS linear-time algorithm for constructing suffix array.
                                      It requires 5.37N memory where N is the size of the database.
                                      IS is moderately fast, but does not work with database larger
                                      than 2GB. IS is the default algorithm due to its simplicity.
                             - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                                      whole human genome, but it does not work with database
                                      smaller than 10MB and it is usually slower than IS.""",
                            checker_function=lambda x: x in ["is", "bwtsw"],
                            equate=False, is_required=True),
                    _Option(["-p", "p", "prefix"],
                            "Prefix of the output database [same as db filename]",
                            equate=False, is_required=False),
                    _Argument(["infile"], "Input file name", filename=True, is_required=True),
                    _Switch(["-c", "c"],
                            "Build color-space index. The input fasta should be in nucleotide space.")
                ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #2
0
    def __init__(self, cmd="samtools", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(
                ["-s", "s"],
                """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only""",
            ),
            _Switch(
                ["-S", "S"],
                """Treat paired-end reads
                                    as single-end reads""",
            ),
            _Argument(
                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
                "Name Sorted Alignment File ",
                filename=True,
                is_required=True,
            ),
            _Argument(
                ["out_bam", "output_bam", "output", "output_file"],
                "Output file",
                filename=True,
                is_required=True,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #3
0
파일: _samtools.py 프로젝트: bow/biopython
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(
                ["-s", "s"],
                """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only""",
            ),
            _Switch(
                ["-S", "S"],
                """Treat paired-end reads
                                    as single-end reads""",
            ),
            _Argument(
                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
                "Name Sorted Alignment File ",
                filename=True,
                is_required=True,
            ),
            _Argument(
                ["out_bam", "output_bam", "output", "output_file"], "Output file", filename=True, is_required=True
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #4
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("sampe"),
            _Argument(["reference"],
                      "Reference file name",
                      filename=True,
                      is_required=True),
            _Argument(["sai_file1"],
                      "Sai file 1",
                      filename=True,
                      is_required=True),
            _Argument(["sai_file2"],
                      "Sai file 2",
                      filename=True,
                      is_required=True),
            _Argument(["read_file1"],
                      "Read  file 1",
                      filename=True,
                      is_required=True),
            _Argument(["read_file2"],
                      "Read  file 2",
                      filename=True,
                      is_required=True),
            _Option(
                ["-a", "a"],
                """Maximum insert size for a read pair to be considered being mapped properly [500].

                    Since 0.4.5, this option is only used when there are not enough
                    good alignments to infer the distribution of insert sizes.""",
                checker_function=lambda x: isinstance(x, int),
                equate=False),
            _Option(["-o", "o"],
                    """Maximum occurrences of a read for pairing [100000].

                        A read with more occurrences will be treated as a single-end read.
                        Reducing this parameter helps faster pairing.""",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(
                ["-n", "n"],
                """Maximum number of alignments to output in the XA tag for reads paired properly [3].

                    If a read has more than INT hits, the XA tag will not be written.""",
                checker_function=lambda x: isinstance(x, int),
                equate=False),
            _Option(
                ["-N", "N"],
                """Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) [10].

                    If a read has more than INT hits, the XA tag will not be written.""",
                checker_function=lambda x: isinstance(x, int),
                equate=False),
            _Option(
                ["-r", "r"],
                "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                checker_function=lambda x: isinstance(x, basestring),
                equate=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #5
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd

        # options for version samtools 1.3.1
        self.parameters = [
            _StaticArgument("sort"),
            _Switch(["-n", "n"], """Sort by read names rather
                                    than by chromosomal coordinates"""),
            _Option(["-o", "o"], """(file) Write the final sorted output to FILE,
                    rather than to standard output""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-O", "O"], """(FORMAT) Write the final output as sam, bam, or cram""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-T", "T"], """(PREFIX) Write temporary files to PREFIX.nnnn.bam, or if the specified PREFIX
                    is an existing directory, to PREFIX/samtools.mmm.mmm.tmp.nnnn.bam,
                    where mmm is unique to this invocation of the sort command""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-I", "I"], """(INT) Set the desired compression level for the final output file,
                    ranging from 0 (uncompressed) or 1 (fastest but minimal compression)
                    to 9 (best compression but slowest to write), similarly to gzip(1)'s compression level setting.""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-m", "m"], "Approximately the maximum required memory",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input"], "Input SAM/BAM/CRAM file",
                      filename=True, is_required=True),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #6
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("targetcut"),
            _Option(["-Q", "Q"], "Minimum Base Quality ",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-i", "i"], "Insertion Penalty",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-f", "f"], "Reference Filename",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-0", "em0"], "em0", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-1", "em1"], "em1", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-2", "em2"], "em2", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Argument(["input", "input_bam", "in_bam"],
                      "Input file",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #7
0
 def __init__(self, cmd="samtools", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"],
                 """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True,
                   is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True,
                       is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #8
0
 def __init__(self, cmd="samtools", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"], "Input file",
                   filename=True, is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"], "Prefix of BAM output",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"], "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"], """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"], """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
 def __init__(self, cmd="samtools", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("cat"),
         _Option(
             ["-h", "h"],
             "Header SAM file",
             filename=True,
             equate=False,
             checker_function=lambda x: isinstance(x, str),
         ),
         _Option(
             ["-o", "o"],
             "Output SAM file",
             filename=True,
             equate=False,
             checker_function=lambda x: isinstance(x, str),
         ),
         _ArgumentList(
             ["input", "input_bam", "bams"],
             "Input BAM files",
             filename=True,
             is_required=True,
         ),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #10
0
파일: _samtools.py 프로젝트: bow/biopython
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("idxstats"),
         _Argument(["input", "in_bam", "input_bam"], "BAM file to be indexed"),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #11
0
    def __init__(self, cmd="bwa", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("samse"),
            _Argument(["reference"],
                      "Reference file name",
                      filename=True,
                      is_required=True),
            _Argument(["sai_file"],
                      "Sai file name",
                      filename=True,
                      is_required=True),
            _Argument(["read_file"],
                      "Read  file name",
                      filename=True,
                      is_required=True),
            _Option(
                ["-n", "n"],
                """Maximum number of alignments to output in the XA tag for reads paired properly.

                    If a read has more than INT hits, the XA tag will not be written. [3]""",
                checker_function=lambda x: isinstance(x, int),
                equate=False),
            _Option(
                ["-r", "r"],
                "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                checker_function=lambda x: isinstance(x, int),
                equate=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #12
0
    def __init__(self, cmd="bwa", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("index"),
            _Option(["-a", "a", "algorithm"],
                    """Algorithm for constructing BWT index.

                    Available options are:
                        - is:    IS linear-time algorithm for constructing suffix array.
                          It requires 5.37N memory where N is the size of the database.
                          IS is moderately fast, but does not work with database larger
                          than 2GB. IS is the default algorithm due to its simplicity.
                        - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                          whole human genome, but it does not work with database
                          smaller than 10MB and it is usually slower than IS.""",
                    checker_function=lambda x: x in ["is", "bwtsw"],
                    equate=False,
                    is_required=True),
            _Option(["-p", "p", "prefix"],
                    "Prefix of the output database [same as db filename]",
                    equate=False,
                    is_required=False),
            _Argument(["infile"],
                      "Input file name",
                      filename=True,
                      is_required=True),
            _Switch([
                "-c", "c"
            ], "Build color-space index. The input fasta should be in nucleotide space."
                    )
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #13
0
    def __init__(self, cmd="samtools", **kwargs):
        """Initialize the class."""
        self.program_name = cmd

        # options for version samtools 0.0.19
        self.parameters = [
            _StaticArgument("sort"),
            _Switch(["-o", "o"], """Output the final alignment
                                    to the standard output"""),
            _Switch(["-n", "n"], """Sort by read names rather
                                    than by chromosomal coordinates"""),
            _Option(["-m", "m"],
                    "Approximately the maximum required memory",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input"],
                      "Input BAM file",
                      filename=True,
                      is_required=True),
            _Argument(["out_prefix"],
                      "Output prefix",
                      filename=True,
                      is_required=True),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #14
0
 def __init__(self, cmd="samtools", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"],
                   "Input file",
                   filename=True,
                   is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"],
                 "Prefix of BAM output",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"],
                 "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"],
                 """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"],
                 """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #15
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"], """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True, is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #16
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("targetcut"),
         _Option(["-Q", "Q"],
                 "Minimum Base Quality ",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-i", "i"],
                 "Insertion Penalty",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-f", "f"],
                 "Reference Filename",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-0", "em0"],
                 "em0",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-1", "em1"],
                 "em1",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-2", "em2"],
                 "em2",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["input", "input_bam", "in_bam"],
                   "Input file",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #17
0
파일: _bwa.py 프로젝트: AkiOhtani/biopython
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("bwasw"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read file", filename=True, is_required=True),
                    _Argument(["mate_file"], "Mate file", filename=True, is_required=False),
                    _Option(["-a", "a"],
                            "Score of a match [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-b", "b"],
                            "Mismatch penalty [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            "Gap open penalty [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Gap extension penalty. The penalty for a contiguous gap of size k is q+k*r. [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"],
                            "Number of threads in the multi-threading mode [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-w", "w"],
                            "Band width in the banded alignment [33]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-T", "T"],
                            "Minimum score threshold divided by a [37]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-c", "c"],
                            """Coefficient for threshold adjustment according to query length [5.5].

                            Given an l-long query, the threshold for a hit to be retained is
                            a*max{T,c*log(l)}.""",
                            checker_function=lambda x: isinstance(x, float),
                            equate=False),
                    _Option(["-z", "z"],
                            "Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-s", "s"],
                            """Maximum SA interval size for initiating a seed [3].

                            Higher -s increases accuracy at the cost of speed.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            "Minimum number of seeds supporting the resultant alignment to skip reverse alignment. [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #18
0
파일: _bwa.py 프로젝트: zellera93/biopython
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("bwasw"),
                    _Argument(["reference"],"Reference file name", filename=True, is_required=True),
                    _Argument(["read_file"],"Read file", filename=True, is_required=True),
                    _Argument(["mate_file"],"Mate file", filename=True, is_required=False),
                    _Option(["-a", "a"],
                            "Score of a match [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-b", "b"],
                            "Mismatch penalty [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            "Gap open penalty [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Gap extension penalty. The penalty for a contiguous gap of size k is q+k*r. [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"],
                            "Number of threads in the multi-threading mode [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-w", "w"],
                            "Band width in the banded alignment [33]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-T", "T"],
                            "Minimum score threshold divided by a [37]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-c", "c"],
                            """Coefficient for threshold adjustment according to query length [5.5].

                            Given an l-long query, the threshold for a hit to be retained is
                            a*max{T,c*log(l)}.""",
                            checker_function=lambda x: isinstance(x, float),
                            equate=False),
                    _Option(["-z", "z"],
                            "Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-s", "s"],
                            """Maximum SA interval size for initiating a seed [3].

                            Higher -s increases accuracy at the cost of speed.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            "Minimum number of seeds supporting the resultant alignment to skip reverse alignment. [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
 def __init__(self, cmd="samtools", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("index"),
         _Argument(["input", "in_bam", "input_bam"], "BAM file to be indexed"),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #20
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("idxstats"),
         _Argument(["input", "in_bam", "input_bam"],
                   "BAM file to be indexed")
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #21
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("faidx"),
            _Argument(["reference", "reference_fasta", "ref"],
                      "Reference FASTA to be indexed",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
    def __init__(self, cmd="samtools", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(
                ["-E", "E"],
                """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor.""",
            ),
            _Switch(
                ["-e", "e"],
                """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment.""",
            ),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(
                ["-r", "r"],
                """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A).""",
            ),
            _Switch(
                ["-A", "A"],
                """When used jointly with -r this option overwrites
                    the original base quality""",
            ),
            _Option(
                ["-C", "C"],
                """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                equate=False,
                checker_function=lambda x: isinstance(x, int),
            ),
            _Argument(
                ["input", "input_file", "in_bam", "infile", "input_bam"],
                "Input BAM",
                filename=True,
                is_required=True,
            ),
            _Argument(
                ["reference", "reference_fasta", "ref"],
                "Reference FASTA to be indexed",
                filename=True,
                is_required=True,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #23
0
    def __init__(self, cmd="samtools", **kwargs):
        """Initialize the class."""
        self.program_name = cmd

        # options for version samtools 1.3.1
        self.parameters = [
            _StaticArgument("sort"),
            _Switch(
                ["-n", "n"],
                """Sort by read names rather
                                    than by chromosomal coordinates""",
            ),
            _Option(
                ["-o", "o"],
                """(file) Write the final sorted output to FILE,
                    rather than to standard output""",
                equate=False,
                checker_function=lambda x: isinstance(x, str),
            ),
            _Option(
                ["-O", "O"],
                """(FORMAT) Write the final output as sam, bam, or cram""",
                equate=False,
                checker_function=lambda x: isinstance(x, str),
            ),
            _Option(
                ["-T", "T"],
                """(PREFIX) Write temporary files to PREFIX.nnnn.bam, or if the specified PREFIX
                    is an existing directory, to PREFIX/samtools.mmm.mmm.tmp.nnnn.bam,
                    where mmm is unique to this invocation of the sort command""",
                equate=False,
                checker_function=lambda x: isinstance(x, str),
            ),
            _Option(
                ["-I", "I"],
                """(INT) Set the desired compression level for the final output file,
                    ranging from 0 (uncompressed) or 1 (fastest but minimal compression)
                    to 9 (best compression but slowest to write), similarly to gzip(1)'s compression level setting.""",
                equate=False,
                checker_function=lambda x: isinstance(x, str),
            ),
            _Option(
                ["-m", "m"],
                "Approximately the maximum required memory",
                equate=False,
                checker_function=lambda x: isinstance(x, int),
            ),
            _Argument(["input"],
                      "Input SAM/BAM/CRAM file",
                      filename=True,
                      is_required=True),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #24
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("reheader"),
         _Argument(["input_header", "header_sam", "sam_file"],
                   "Sam file with header",
                   filename=True, is_required=True),
         _Argument(["input_bam", "input_file", "bam_file"],
                   "BAM file for writing header to",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #25
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("fixmate"),
         _Argument(["in_bam", "sorted_bam", "input_bam",
                    "input", "input_file"],
                   "Name Sorted Alignment File ",
                   filename=True, is_required=True),
         _Argument(["out_bam", "output_bam", "output", "output_file"],
                   "Output file",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #26
0
파일: _samtools.py 프로젝트: bow/biopython
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(
                ["-E", "E"],
                """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor.""",
            ),
            _Switch(
                ["-e", "e"],
                """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment.""",
            ),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(
                ["-r", "r"],
                """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A).""",
            ),
            _Switch(
                ["-A", "A"],
                """When used jointly with -r this option overwrites
                    the original base quality""",
            ),
            _Option(
                ["-C", "C"],
                """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                equate=False,
                checker_function=lambda x: isinstance(x, int),
            ),
            _Argument(
                ["input", "input_file", "in_bam", "infile", "input_bam"], "Input BAM", filename=True, is_required=True
            ),
            _Argument(
                ["reference", "reference_fasta", "ref"],
                "Reference FASTA to be indexed",
                filename=True,
                is_required=True,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #27
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("cat"),
            _Option(["-h", "h"], "Header SAM file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"], "Output SAM file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _ArgumentList(["input", "input_bam", "bams"], "Input BAM files",
                          filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #28
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("sort"),
         _Switch(["-o", "o"], """Output the final alignment
                                 to the standard output"""),
         _Switch(["-n", "n"], """Sort by read names rather
                                 than by chromosomal coordinates"""),
         _Option(["-m", "m"], "Approximately the maximum required memory",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Argument(["input_bam"], "Input BAM file",
                   filename=True, is_required=True),
         _Argument(["out_prefix"], "Output prefix",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #29
0
    def __init__(self, cmd="gt", **kwargs):
        """ Construct and evaluate genometools merge commands.

        Example
        >>> x = Merge(infiles=["test.gff3"], tidy=True)
        >>> print(x)
        gt merge -tidy test.gff3
        """

        self.program_name = f"{cmd} merge"
        self.parameters = [
            _StaticArgument("merge"),
            _Switch(["-tidy", "tidy"],
                    ("Try to tidy the GFF3 files up during parsing.")),
            _Switch(["-retainids", "retainids"],
                    ("when available, use the original IDs provided in the "
                     "source file")),
            _Option(
                ["-o", "outfile"],
                "redirect output to specified file",
                checker_function=check_is_str,
                filename=True,
                equate=False,
            ),
            _Switch(["-gzip", "gzip"], "write gzip compressed output file."),
            _Switch(
                ["-bzip2", "bzip2"],
                "write bzip2 compressed output file.",
            ),
            _Switch(
                ["-force", "force"],
                "force writing to output file",
            ),
            _Switch(["-help", "help"], "Show help and exit"),
            _Switch(["-version", "version"],
                    "display version information and exit"),
            _ArgumentList(["infiles"],
                          "The GFF3 files to operate on.",
                          checker_function=check_is_list_of_str,
                          filename=True,
                          is_required=True)
        ]

        super().__init__(cmd, **kwargs)
        return
예제 #30
0
파일: _bwa.py 프로젝트: lennax/biopython
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("samse"),
            _Argument(["reference"], "Reference file name", filename=True, is_required=True),
            _Argument(["sai_file"], "Sai file name", filename=True, is_required=True),
            _Argument(["read_file"], "Read  file name", filename=True, is_required=True),
            _Option(["-n", "n"],
                    """Maximum number of alignments to output in the XA tag for reads paired properly.

                    If a read has more than INT hits, the XA tag will not be written. [3]""",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-r", "r"],
                    "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #31
0
파일: _bwa.py 프로젝트: AkiOhtani/biopython
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("sampe"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file1"], "Sai file 1", filename=True, is_required=True),
                    _Argument(["sai_file2"], "Sai file 2", filename=True, is_required=True),
                    _Argument(["read_file1"], "Read  file 1", filename=True, is_required=True),
                    _Argument(["read_file2"], "Read  file 2", filename=True, is_required=True),
                    _Option(["-a", "a"],
                            """Maximum insert size for a read pair to be considered being mapped properly [500].

                            Since 0.4.5, this option is only used when there are not enough
                            good alignments to infer the distribution of insert sizes.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-o", "o"],
                            """Maximum occurrences of a read for pairing [100000].

                            A read with more occurrences will be treated as a single-end read.
                            Reducing this parameter helps faster pairing.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly [3].

                            If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            """Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) [10].

                         .  If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"], "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, basestring),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #32
0
파일: boobook.py 프로젝트: MDU-PHL/boobook
 def __init__(self, cmd = 'bwa', **kwargs):
     self.parameters = [
         _StaticArgument("index"),
         _Option(["-a", "algorithm"],
             "BWT construction algorithm: bwtsw or is [auto]",
             equate = False),
         _Option(["-p", "prefix"],
             "prefix of the index [same as fasta name]",
             equate = False),
         _Option(["-b", "block_size"],
             "block size for the bwtsw algorithm \
             (effective with -a bwtsw) [10000000]",
             equate = False),
         _Switch(['-6', "index_64"],
             "index files named as <in.fasta>.64.* instead of <in.fasta>.*"),
         _Argument(["in_fasta"],
                 "Input FASTA file",
                 filename = True,
                 is_required = True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
     return
예제 #33
0
파일: _bwa.py 프로젝트: zellera93/biopython
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("aln"),
                    _Argument(["reference"], "Reference file name",
                              filename=True, is_required=True),
                    _Argument(["read_file"], "Read file name",
                              filename=True, is_required=True),
                    _Option(["-n", "n"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-o", "o"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-e", "e"],
                            "Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-d", "d"],
                            "Disallow a long deletion within INT bp towards the 3-end [16]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-i", "i"],
                            "Disallow an indel within INT bp towards the ends [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-l", "l"],
                            """Take the first INT subsequence as seed.

                            If INT is larger than the query sequence, seeding will be disabled.
                            For long reads, this option is typically ranged from 25 to 35 for
                            -k 2. [inf]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-k","k"], "Maximum edit distance in the seed [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"], "Number of threads (multi-threading mode) [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-M", "M"],
                            "Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-O", "O"], "Gap open penalty [11]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-E", "E"], "Gap extension penalty [4]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-R", "R"],
                            """Proceed with suboptimal alignments if there are no more than INT equally best hits.

                            This option only affects paired-end mapping. Increasing this threshold helps
                            to improve the pairing accuracy at the cost of speed, especially for short
                            reads (~32bp).""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            """Parameter for read trimming [0].

                            BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT
                            where l is the original read length.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-B", "B"],
                            "Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",
                            checker_function=lambda x: isinstance(x,int),
                            equate=False),
                    _Switch(["-c", "c"],
                            "Reverse query but not complement it, which is required for alignment in the color space."),
                    _Switch(["-N", "N"],
                            "Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                    _Switch(["-I","I"],
                            "The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                    _Switch(["-b", "b"],
                            "Specify the input read sequence file is the BAM format"),
                    _Switch(["-b1", "b1"],
                            "When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                    _Switch(["-b2", "b2"],
                            "When -b is specified, only use the second read in a read pair in mapping.")
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #34
0
    def __init__(self, cmd="gt", **kwargs):
        """ Construct and evaluate genometools gff3 commands.

        Example
        >>> x = GFF3(infile="test.gff3", sort=True)
        >>> print(x)
        gt gff3 -sort test.gff3
        """

        self.program_name = f"{cmd} gff3"
        self.parameters = [
            _StaticArgument("gff3"),
            _Switch(["-sort", "sort"],
                    ("Sort the GFF3 features (memory consumption is "
                     "proportional to the input file size(s))")),
            _Switch(["-sortlines", "sortlines"],
                    ("sort the GFF3 features on a strict line basis "
                     "(not sorted asdefined by GenomeTools).")),
            _Switch(["-sortnum", "sortnum"],
                    ("enable natural numeric sorting for sequence regions "
                     "(not sorted as defined by GenomeTools)")),
            _Switch(["-tidy", "tidy"],
                    ("Try to tidy the GFF3 files up during parsing.")),
            _Switch(["-retainids", "retainids"],
                    ("when available, use the original IDs provided in the "
                     "source file")),
            _Switch(
                ["-checkids", "checkids"],
                ("make sure the ID attributes are unique within the "
                 "scope of each GFF3_file, as required by GFF3 "
                 "specification. If features with the same Parent "
                 "attribute are not separated by a # line the GFF3 "
                 "parser tries to treat them as a multi-line feature. "
                 "This requires at least matching sequence IDs and types")),
            _Switch(["-addids", "dont_addids"],
                    "add missing '##sequence-region' lines automatically"),
            _Switch(["-fixregionboundaries", "fixregionboundaries"],
                    ('automatically adjust "##sequence-region" lines to '
                     "contain all their features")),
            _Switch(["-addintrons", "addintrons"],
                    "add intron features between existing exon features"),
            _Option(
                ["-offset", "offset"],
                "transform all features by the given offset",
                checker_function=check_is_positive_int,
                equate=False,
            ),
            _Option(
                ["-offsetfile", "offsetfile"],
                "transform all features by the offsets given in file",
                checker_function=check_is_str,
                filename=True,
                equate=False,
            ),
            _Option(
                ["-setsource", "setsource"],
                "set the source value (2nd column) of each feature",
                checker_function=check_is_str,
                equate=False,
            ),
            _Option(
                ["-typecheck", "typecheck"],
                ("use an ontology given in an OBO file to validate "
                 "parent-child relationships.  If no argument is given, "
                 "the sofa.obo file from the gtdata/obo_files directory "
                 "is used.  If an argument is given, it is used as an "
                 "OBO filename.  In the case that such a file does not "
                 "exist .obo is added to the argument and loading the "
                 "resulting filename from the gtdata/obo_files "
                 "directory is attempted."),
                checker_function=check_is_str,
                filename=True,
            ),
            _Option(
                ["-xrfcheck", "xrfcheck"],
                ("check Dbxref and Ontology_term attributes for correct "
                 "syntax according to a abbreviation definition file. "
                 "If no argument is given, the GO.xrf_abbs file from the "
                 "gtdata/xrf_abbr directory is used. If an argument is "
                 "given, it is used as an specific filename for an "
                 "abbreviation file. In the case that such a file does "
                 "not exist, .xrf_abbr is added to the argument and "
                 "loading the resulting filename from the "
                 "gtdata/xrf_abbr directory is attempted."),
                checker_function=check_is_str,
                filename=True,
            ),
            _Switch(["-show", "noshow"], "don't show GFF3 output"),
            _Switch(["-v", "verbose"], "be verbose"),
            _Option(
                ["-width", "width"],
                ("set output width for FASTA sequence printing, "
                 "(0 disables formatting)"),
                checker_function=check_is_positive_int,
                equate=False,
            ),
            _Option(
                ["-o", "outfile"],
                "redirect output to specified file",
                checker_function=check_is_str,
                filename=True,
                equate=False,
            ),
            _Switch(["-gzip", "gzip"], "write gzip compressed output file."),
            _Switch(
                ["-bzip2", "bzip2"],
                "write bzip2 compressed output file.",
            ),
            _Switch(
                ["-force", "force"],
                "force writing to output file",
            ),
            _Switch(["-help", "help"], "Show help and exit"),
            _Switch(["-version", "version"],
                    "display version information and exit"),
            _Argument(["infile", "infile2"],
                      "The GFF3 file to operate on.",
                      checker_function=check_is_str,
                      filename=True,
                      is_required=True)
        ]

        super().__init__(cmd, **kwargs)
        return
예제 #35
0
파일: _bwa.py 프로젝트: HuttonICS/biopython
    def __init__(self, cmd="bwa", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("mem"),
            _Argument(["reference"], "Reference file name",
                      filename=True, is_required=True),
            _Argument(["read_file1"], "Read 1 file name",
                      filename=True, is_required=True),
            _Argument(["read_file2"], "Read 2 file name",
                      filename=True, is_required=False),
            _Option(["-t", "t"], "Number of threads [1]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-k", "k"],
                    "Minimum seed length. Matches shorter than INT will be missed. The alignment speed is usually insensitive to this value unless it significantly deviates 20. [19]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-w", "w"],
                    "Band width. Essentially, gaps longer than INT will not be found. Note that the maximum gap length is also affected by the scoring matrix and the hit length, not solely determined by this option. [100]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-d", "d"],
                    "Off-diagonal X-dropoff (Z-dropoff). Stop extension when the difference between the best and the current extension score is above |i-j|*A+INT, where i and j are the current positions of the query and reference, respectively, and A is the matching score. Z-dropoff is similar to BLAST\'s X-dropoff except that it doesn\'t penalize gaps in one of the sequences in the alignment. Z-dropoff not only avoids unnecessary extension, but also reduces poor alignments inside a long good alignment. [100]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-r", "r"],
                    "Trigger re-seeding for a MEM longer than minSeedLen*FLOAT. This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy. [1.5]",
                    checker_function=lambda x: isinstance(x, (int, float)),
                    equate=False),
            _Option(["-c", "c"],
                    "Discard a MEM if it has more than INT occurence in the genome. This is an insensitive parameter. [10000]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-A", "A"],
                    "Matching score. [1]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-B", "B"],
                    "Mismatch penalty. The sequence error rate is approximately: {.75 * exp[-log(4) * B/A]}. [4]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-O", "O"],
                    "Gap open penalty. [6]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-E", "E"],
                    "Gap extension penalty. A gap of length k costs O + k*E (i.e. -O is for opening a zero-length gap). [1]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-L", "L"],
                    "Clipping penalty. When performing SW extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best SW score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best SW score; clipping penalty is not deducted. [5]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-U", "U"],
                    "Penalty for an unpaired read pair. BWA-MEM scores an unpaired read pair as scoreRead1+scoreRead2-INT and scores a paired as scoreRead1+scoreRead2-insertPenalty. It compares these two scores to determine whether we should force pairing. [9] ",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-R", "R"],
                    "Complete read group header line. \'\\t\' can be used in STR and will be converted to a TAB in the output SAM. The read group ID will be attached to every read in the output. An example is \'@RG\tID:foo\tSM:bar\'. [null]",
                    checker_function=lambda x: isinstance(x, basestring),
                    equate=False),
            _Option(["-T", "T"],
                    "Don\'t output alignment with score lower than INT. This option only affects output. [30]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-v", "v"],
                    "Control the verbose level of the output. This option has not been fully supported throughout BWA. Ideally, a value 0 for disabling all the output to stderr; 1 for outputting errors only; 2 for warnings and errors; 3 for all normal messages; 4 or higher for debugging. When this option takes value 4, the output is not SAM. [3]",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),

            _Switch(["-P", "P"],
                    "In the paired-end mode, perform SW to rescue missing hits only but do not try to find hits that fit a proper pair."),
            _Switch(["-p", "p"],
                    "Assume the first input query file is interleaved paired-end FASTA/Q. See the command description for details."),
            _Switch(["-a", "a"],
                    "Output all found alignments for single-end or unpaired paired-end reads. These alignments will be flagged as secondary alignments."),
            _Switch(["-C", "C"],
                    "Append FASTA/Q comment to SAM output. This option can be used to transfer read meta information (e.g. barcode) to the SAM output. Note that the FASTA/Q comment (the string after a space in the header line) must conform the SAM spec (e.g. BC:Z:CGTAC). Malformated comments lead to incorrect SAM output."),
            _Switch(["-H", "H"],
                    "Use hard clipping \'H\' in the SAM output. This option may dramatically reduce the redundancy of output when mapping long contig or BAC sequences."),
            _Switch(["-M", "M"],
                    "Mark shorter split hits as secondary (for Picard compatibility).")
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #36
0
파일: boobook.py 프로젝트: MDU-PHL/boobook
 def __init__(self, cmd = 'bwa', **kwargs):
     self.parameters = [
         _StaticArgument("mem"),
     #Algorithm options
         _Option(["-t", "threads"],
             "number of threads [default: 1]",
             equate = False),
         _Option(["-k", "seed_len"],
             "minimum seed length [default: 19]",
             equate = False),
         _Option(["-w", "band_width"],
             "band width for banded alignment [default: 100]",
             equate = False),
         _Option(["-d", "off_diag"],
             "off-diagonal X-dropoff [100]",
             equate = False),
         _Option(["-r", "intern_seeds"],
             "look for internal seeds inside a seed longer \
             than {-k} * FLOAT [1.5]",
             equate = False),
         _Option(["-y", "seed_occur"],
             "seed occurrence for the 3rd round seeding [20]",
             equate = False),
         _Option(["-c", "skip_seeds"],
             "skip seeds with more than INT occurrences [500]",
             equate = False),
         _Option(["-D", "drop_chains"],
             "drop chains shorter than FLOAT fraction of the longest \
             overlapping chain [0.50]",
             equate = False),
         _Option(["-W", "discard_chain"],
             "discard a chain if seeded bases shorter than INT [0]",
             equate = False),
         _Option(["-m", "max_rounds"],
             "perform at most INT rounds of mate rescues for each read [50]",
             equate = False),
         _Switch(["-S", "skip_mate"],
             "skip mate rescue"),
         _Switch(['-P', "skip_pairing"],
             "skip pairing; mate rescue performed unless -S also in use"),
         _Switch(["-e", "discard_matches"],
             "discard full-length exact matches"),
     #Scoring options
         _Option(["-A", "match_score"],
             "score for a sequence match, which scales \
             options -TdBOELU unless overridden [1]",
             equate = False),
         _Option(["-B", "mis_penalty"],
             "penalty for a mismatch [4]",
             equate = False),
         _Option(["-O", "gap_open_penal"],
             "gap open penalties for deletions and insertions [6,6]",
             equate = False),
         _Option(["-E", "gap_exten_penal"],
             "gap extension penalty; a gap of size k cost \
             '{-O} + {-E}*k' [1,1]",
             equate = False),
         _Option(["-L", "clip_penal"],
             "penalty for 5'- and 3'-end clipping [5,5]",
             equate = False),
         _Option(["-U", "unpair_penal"],
             "penalty for an unpaired read pair [17]",
             equate = False),
         _Option(["-x", "read_type"],
             "read type. \
             Setting -x changes multiple parameters unless overriden [null] \
                  pacbio: -k17 -W40 -r10 -A1 -B1 -O1 -E1 -L0  (PacBio reads to ref) \
                  ont2d: -k14 -W20 -r10 -A1 -B1 -O1 -E1 -L0  (Oxford Nanopore 2D-reads to ref) \
                  intractg: -B9 -O16 -L5  (intra-species contigs to ref)",
             equate = False),
     #Input/output options
         _Switch(["-p", "smart_pair"],
             "smart pairing (ignoring in2.fq)"),
         _Option(["-R", "read_group"],
             "read group header line such as '@RG\tID:foo\tSM:bar' [null]",
             equate = False),
         _Option(["-H", "insert_header"],
             "insert STR to header if it starts with @; \
             or insert lines in FILE [null]",
             equate = False),
         _Switch(["-j", "treat_alt"],
             "treat ALT contigs as part of the primary assembly \
             (i.e. ignore <idxbase>.alt file)"),
         _Option(["-v", "verbose_level"],
             "verbose level: 1=error, 2=warning, 3=message, 4+=debugging [3]",
             equate = False),
         _Option(["-T","min_score"],
             "minimum score to output [30]",
             equate = False),
         _Option(["-h", "min_hit_score"],
             "if there are <INT hits with score >80% of the max score, \
             output all in XA [5,200]",
             equate = False),
         _Switch(["-a", "output_all"],
             "output all alignments for SE or unpaired PE"),
         _Switch(["-C", "append_comment"],
             "append FASTA/FASTQ comment to SAM output"),
         _Switch(["-V", "output_ref"],
             "output the reference FASTA header in the XR tag"),
         _Switch(["-Y", "soft_clip"],
             "use soft clipping for supplementary alignments"),
         _Switch(["-M", "mark_splits"],
             "mark shorter split hits as secondary"),
         _Option(["-I", "insert_size"],
             "FLOAT[,FLOAT[,INT[,INT]]] \
             specify the mean, standard deviation (10% of the mean if absent), \
             max (4 sigma from the mean if absent) and min of the insert \
             size distribution. FR orientation only. [inferred]",
             equate = False),
         _Argument(["ref"],
             "Input FASTA reference",
             filename = True,
             is_required = True),
         _Argument(["in_fq1"],
                 "Input FASTQ file 1",
                 filename = True,
                 is_required = True),
         _Argument(["in_fq2"],
                 "Input FASTQ file 2",
                 filename = True,
                 is_required = False)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
     return
예제 #37
0
 def __init__(self, cmd="bwa", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("mem"),
         _Argument(["reference"],
                   "Reference file name",
                   filename=True,
                   is_required=True),
         _Argument(["read_file1"],
                   "Read 1 file name",
                   filename=True,
                   is_required=True),
         _Argument(["read_file2"],
                   "Read 2 file name",
                   filename=True,
                   is_required=False),
         _Option(
             ["-t", "t"],
             "Number of threads [1]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-k", "k"],
             "Minimum seed length. Matches shorter than INT will be missed. The alignment speed is usually insensitive to this value unless it significantly deviates 20. [19]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-w", "w"],
             "Band width. Essentially, gaps longer than INT will not be found. Note that the maximum gap length is also affected by the scoring matrix and the hit length, not solely determined by this option. [100]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-d", "d"],
             r"Off-diagonal X-dropoff (Z-dropoff). Stop extension when the difference between the best and the current extension score is above \|i-j\|*A+INT, where i and j are the current positions of the query and reference, respectively, and A is the matching score. Z-dropoff is similar to BLAST's X-dropoff except that it doesn't penalize gaps in one of the sequences in the alignment. Z-dropoff not only avoids unnecessary extension, but also reduces poor alignments inside a long good alignment. [100]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-r", "r"],
             "Trigger re-seeding for a MEM longer than minSeedLen*FLOAT. This is a key heuristic parameter for tuning the performance. Larger value yields fewer seeds, which leads to faster alignment speed but lower accuracy. [1.5]",
             checker_function=lambda x: isinstance(x, (int, float)),
             equate=False,
         ),
         _Option(
             ["-c", "c"],
             "Discard a MEM if it has more than INT occurence in the genome. This is an insensitive parameter. [10000]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-A", "A"],
             "Matching score. [1]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-B", "B"],
             "Mismatch penalty. The sequence error rate is approximately: {.75 * exp[-log(4) * B/A]}. [4]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-O", "O"],
             "Gap open penalty. [6]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-E", "E"],
             "Gap extension penalty. A gap of length k costs O + k*E (i.e. -O is for opening a zero-length gap). [1]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-L", "L"],
             "Clipping penalty. When performing SW extension, BWA-MEM keeps track of the best score reaching the end of query. If this score is larger than the best SW score minus the clipping penalty, clipping will not be applied. Note that in this case, the SAM AS tag reports the best SW score; clipping penalty is not deducted. [5]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-U", "U"],
             "Penalty for an unpaired read pair. BWA-MEM scores an unpaired read pair as scoreRead1+scoreRead2-INT and scores a paired as scoreRead1+scoreRead2-insertPenalty. It compares these two scores to determine whether we should force pairing. [9] ",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-R", "R"],
             "Complete read group header line. 't' can be used in STR and will be converted to a TAB in the output SAM. The read group ID will be attached to every read in the output. An example is '@RG\tID:foo\tSM:bar'. [null]",
             checker_function=lambda x: isinstance(x, str),
             equate=False,
         ),
         _Option(
             ["-T", "T"],
             "Don't output alignment with score lower than INT. This option only affects output. [30]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Option(
             ["-v", "v"],
             "Control the verbose level of the output. This option has not been fully supported throughout BWA. Ideally, a value 0 for disabling all the output to stderr; 1 for outputting errors only; 2 for warnings and errors; 3 for all normal messages; 4 or higher for debugging. When this option takes value 4, the output is not SAM. [3]",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Switch(
             ["-P", "P"],
             "In the paired-end mode, perform SW to rescue missing hits only but do not try to find hits that fit a proper pair.",
         ),
         _Switch(
             ["-p", "p"],
             "Assume the first input query file is interleaved paired-end FASTA/Q. See the command description for details.",
         ),
         _Switch(
             ["-a", "a"],
             "Output all found alignments for single-end or unpaired paired-end reads. These alignments will be flagged as secondary alignments.",
         ),
         _Switch(
             ["-C", "C"],
             "Append FASTA/Q comment to SAM output. This option can be used to transfer read meta information (e.g. barcode) to the SAM output. Note that the FASTA/Q comment (the string after a space in the header line) must conform the SAM spec (e.g. BC:Z:CGTAC). Malformated comments lead to incorrect SAM output.",
         ),
         _Switch(
             ["-H", "H"],
             "Use hard clipping 'H' in the SAM output. This option may dramatically reduce the redundancy of output when mapping long contig or BAC sequences.",
         ),
         _Switch(
             ["-M", "M"],
             "Mark shorter split hits as secondary (for Picard compatibility).",
         ),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #38
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("view"),
            _Switch(["-b", "b"], "Output in the BAM format"),
            _Switch(["-c", "c"],
                    """Instead of printing the alignments, only count them and
                    print the total number.

                    All filter options, such as '-f', '-F' and '-q',
                    are taken into account"""),
            _Switch(["-h", "h"], "Include the header in the output"),
            _Switch(["-u", "u"],
                    """Output uncompressed BAM.

                    This option saves time spent on compression/decompression
                    and is thus preferred when the output is piped to
                    another samtools command"""),
            _Switch(["-H", "H"], "Output the header only"),
            _Switch(["-S", "S"],
                    """Input is in SAM.
                    If @SQ header lines are absent,
                    the '-t' option is required."""),
            _Option(["-t", "t"],
                    """This file is TAB-delimited.
                    Each line must contain the reference name and the
                    length of the reference, one line for each
                    distinct reference; additional fields are ignored.

                    This file also defines the order of the reference
                    sequences in sorting.
                    If you run   'samtools faidx <ref.fa>',
                    the resultant index file <ref.fa>.fai can be used
                    as this <in.ref_list> file.""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"], "Output file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """Only output alignments with all bits in
                    INT present in the FLAG field""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-F", "F"],
                    "Skip alignments with bits present in INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Skip alignments with MAPQ smaller than INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only output reads in read group STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-R", "R"],
                    "Output reads in read groups listed in FILE",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    "Only output reads in library STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Switch(["-1", "fast_bam"],
                    "Use zlib compression level 1 to compress the output"),
            _Argument(["input", "input_file"],
                      "Input File Name", filename=True, is_required=True),
            _Argument(["region"], "Region", is_required=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #39
0
파일: _bwa.py 프로젝트: AkiOhtani/biopython
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("aln"),
                    _Argument(["reference"], "Reference file name",
                              filename=True, is_required=True),
                    _Argument(["read_file"], "Read file name",
                              filename=True, is_required=True),
                    _Option(["-n", "n"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-o", "o"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-e", "e"],
                            "Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-d", "d"],
                            "Disallow a long deletion within INT bp towards the 3-end [16]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-i", "i"],
                            "Disallow an indel within INT bp towards the ends [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-l", "l"],
                            """Take the first INT subsequence as seed.

                            If INT is larger than the query sequence, seeding will be disabled.
                            For long reads, this option is typically ranged from 25 to 35 for
                            -k 2. [inf]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-k", "k"], "Maximum edit distance in the seed [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"], "Number of threads (multi-threading mode) [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-M", "M"],
                            "Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-O", "O"], "Gap open penalty [11]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-E", "E"], "Gap extension penalty [4]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-R", "R"],
                            """Proceed with suboptimal alignments if there are no more than INT equally best hits.

                            This option only affects paired-end mapping. Increasing this threshold helps
                            to improve the pairing accuracy at the cost of speed, especially for short
                            reads (~32bp).""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            """Parameter for read trimming [0].

                            BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT
                            where l is the original read length.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-B", "B"],
                            "Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Switch(["-c", "c"],
                            "Reverse query but not complement it, which is required for alignment in the color space."),
                    _Switch(["-N", "N"],
                            "Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                    _Switch(["-I", "I"],
                            "The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                    _Switch(["-b", "b"],
                            "Specify the input read sequence file is the BAM format"),
                    _Switch(["-b1", "b1"],
                            "When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                    _Switch(["-b2", "b2"],
                            "When -b is specified, only use the second read in a read pair in mapping.")
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
예제 #40
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("mpileup"),
            _Switch(["-E", "E"],
                    """Extended BAQ computation.
                    This option helps sensitivity especially
                    for MNPs, but may hurt specificity a little bit"""),
            _Switch(["-B", "B"],
                    """Disable probabilistic realignment for the
                    computation of base alignment quality (BAQ).

                    BAQ is the Phred-scaled probability of a read base being
                    misaligned.
                    Applying this option greatly helps to reduce false SNPs
                    caused by misalignments"""),
            _Switch(["-g", "g"],
                    """Compute genotype likelihoods and output them in the
                    binary call format (BCF)"""),
            _Switch(["-u", "u"],
                    """Similar to -g except that the output is
                    uncompressed BCF, which is preferred for piping"""),
            _Option(["-C", "C"],
                    """Coefficient for downgrading mapping quality for
                    reads containing excessive mismatches.

                    Given a read with a phred-scaled probability q of
                    being generated from the mapped position,
                    the new mapping quality is about sqrt((INT-q)/INT)*INT.
                    A zero value disables this functionality;
                    if enabled, the recommended value for BWA is 50""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only generate pileup in region STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """The faidx-indexed reference file in the FASTA format.

                    The file can be optionally compressed by razip""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    """BED or position list file containing a list of regions
                    or sites where pileup or BCF should be generated""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-M", "M"],
                    "Cap Mapping Quality at M",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Minimum mapping quality for an alignment to be used",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-Q", "Q"],
                    "Minimum base quality for a base to be considered",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-6", "illumina_13"],
                    "Assume the quality is in the Illumina 1.3+ encoding"),
            _Switch(["-A", "A"],
                    "Do not skip anomalous read pairs in variant calling."),
            _Option(["-b", "b"],
                    "List of input BAM files, one file per line",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-d", "d"],
                    "At a position, read maximally INT reads per input BAM",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-D", "D"], "Output per-sample read depth"),
            _Switch(["-S", "S"], """Output per-sample Phred-scaled
                                strand bias P-value"""),
            _Option(["-e", "e"],
                    """Phred-scaled gap extension sequencing error probability.

                    Reducing INT leads to longer indels""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-h", "h"],
                    """Coefficient for modeling homopolymer errors.

                    Given an l-long homopolymer run, the sequencing error
                    of an indel of size s is modeled as INT*s/l""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-I", "I"], "Do not perform INDEL calling"),
            _Option(["-L", "L"],
                    """Skip INDEL calling if the average per-sample
                    depth is above INT""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-o", "o"],
                    """Phred-scaled gap open sequencing error probability.

                    Reducing INT leads to more indel calls.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-p", "p"],
                    """Comma delimited list of platforms (determined by @RG-PL)
                    from which indel candidates are obtained.

                    It is recommended to collect indel candidates from
                    sequencing technologies that have low indel error rate
                    such as ILLUMINA""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _ArgumentList(["input_file"],
                          "Input File for generating mpileup",
                          filename=True, is_required=True),

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)