Ejemplo n.º 1
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"], """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True, is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 2
0
Archivo: _bwa.py Proyecto: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("index"),
                    _Option(["-a", "a", "algorithm"],
                            """Algorithm for constructing BWT index.

                            Available options are:
                             - is:    IS linear-time algorithm for constructing suffix array.
                                      It requires 5.37N memory where N is the size of the database.
                                      IS is moderately fast, but does not work with database larger
                                      than 2GB. IS is the default algorithm due to its simplicity.
                             - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                                      whole human genome, but it does not work with database
                                      smaller than 10MB and it is usually slower than IS.""",
                            checker_function=lambda x: x in ["is", "bwtsw"],
                            equate=False, is_required=True),
                    _Option(["-p", "p", "prefix"],
                            "Prefix of the output database [same as db filename]",
                            equate=False, is_required=False),
                    _Argument(["infile"], "Input file name", filename=True, is_required=True),
                    _Switch(["-c", "c"],
                            "Build color-space index. The input fasta should be in nucleotide space.")
                ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 3
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("targetcut"),
         _Option(["-Q", "Q"],
                 "Minimum Base Quality ",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-i", "i"],
                 "Insertion Penalty",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-f", "f"],
                 "Reference Filename",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-0", "em0"],
                 "em0",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-1", "em1"],
                 "em1",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-2", "em2"],
                 "em2",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["input", "input_bam", "in_bam"],
                   "Input file",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 4
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"],
                   "Input file",
                   filename=True,
                   is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"],
                 "Prefix of BAM output",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"],
                 "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"],
                 """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"],
                 """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 5
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("index"),
                    _Option(["-a", "a", "algorithm"],
                            """Algorithm for constructing BWT index.

                            Available options are:
                             - is:    IS linear-time algorithm for constructing suffix array.
                                      It requires 5.37N memory where N is the size of the database.
                                      IS is moderately fast, but does not work with database larger
                                      than 2GB. IS is the default algorithm due to its simplicity.
                             - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                                      whole human genome, but it does not work with database
                                      smaller than 10MB and it is usually slower than IS.""",
                            checker_function=lambda x: x in ["is", "bwtsw"],
                            equate=False, is_required=True),
                    _Option(["-p", "p", "prefix"],
                            "Prefix of the output database [same as db filename]",
                            equate=False, is_required=False),
                    _Argument(["infile"], "Input file name", filename=True, is_required=True),
                    _Switch(["-c", "c"],
                            "Build color-space index. The input fasta should be in nucleotide space.")
                ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 6
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("targetcut"),
            _Option(["-Q", "Q"], "Minimum Base Quality ",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-i", "i"], "Insertion Penalty",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-f", "f"], "Reference Filename",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-0", "em0"], "em0", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-1", "em1"], "em1", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-2", "em2"], "em2", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Argument(["input", "input_bam", "in_bam"],
                      "Input file",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 7
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"],
                 """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True,
                   is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True,
                       is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 8
0
Archivo: _bwa.py Proyecto: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("bwasw"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read file", filename=True, is_required=True),
                    _Argument(["mate_file"], "Mate file", filename=True, is_required=False),
                    _Option(["-a", "a"],
                            "Score of a match [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-b", "b"],
                            "Mismatch penalty [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            "Gap open penalty [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Gap extension penalty. The penalty for a contiguous gap of size k is q+k*r. [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"],
                            "Number of threads in the multi-threading mode [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-w", "w"],
                            "Band width in the banded alignment [33]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-T", "T"],
                            "Minimum score threshold divided by a [37]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-c", "c"],
                            """Coefficient for threshold adjustment according to query length [5.5].

                            Given an l-long query, the threshold for a hit to be retained is
                            a*max{T,c*log(l)}.""",
                            checker_function=lambda x: isinstance(x, float),
                            equate=False),
                    _Option(["-z", "z"],
                            "Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-s", "s"],
                            """Maximum SA interval size for initiating a seed [3].

                            Higher -s increases accuracy at the cost of speed.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            "Minimum number of seeds supporting the resultant alignment to skip reverse alignment. [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 9
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("bwasw"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read file", filename=True, is_required=True),
                    _Argument(["mate_file"], "Mate file", filename=True, is_required=False),
                    _Option(["-a", "a"],
                            "Score of a match [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-b", "b"],
                            "Mismatch penalty [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            "Gap open penalty [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Gap extension penalty. The penalty for a contiguous gap of size k is q+k*r. [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"],
                            "Number of threads in the multi-threading mode [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-w", "w"],
                            "Band width in the banded alignment [33]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-T", "T"],
                            "Minimum score threshold divided by a [37]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-c", "c"],
                            """Coefficient for threshold adjustment according to query length [5.5].

                            Given an l-long query, the threshold for a hit to be retained is
                            a*max{T,c*log(l)}.""",
                            checker_function=lambda x: isinstance(x, float),
                            equate=False),
                    _Option(["-z", "z"],
                            "Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-s", "s"],
                            """Maximum SA interval size for initiating a seed [3].

                            Higher -s increases accuracy at the cost of speed.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            "Minimum number of seeds supporting the resultant alignment to skip reverse alignment. [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 10
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("idxstats"),
         _Argument(["input", "in_bam", "input_bam"],
                   "BAM file to be indexed")
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 11
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("idxstats"),
         _Argument(["input",  "in_bam", "input_bam"],
                   "BAM file to be indexed")
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 12
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("faidx"),
         _Argument(["reference", "reference_fasta", "ref"],
                   "Reference FASTA to be indexed",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 13
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("faidx"),
            _Argument(["reference", "reference_fasta", "ref"],
                      "Reference FASTA to be indexed",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 14
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("reheader"),
         _Argument(["input_header", "header_sam", "sam_file"],
                   "Sam file with header",
                   filename=True, is_required=True),
         _Argument(["input_bam", "input_file", "bam_file"],
                   "BAM file for writing header to",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 15
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("fixmate"),
         _Argument(["in_bam", "sorted_bam", "input_bam",
                    "input", "input_file"],
                   "Name Sorted Alignment File ",
                   filename=True, is_required=True),
         _Argument(["out_bam", "output_bam", "output", "output_file"],
                   "Output file",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 16
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("reheader"),
         _Argument(["input_header", "header_sam", "sam_file"],
                   "Sam file with header",
                   filename=True,
                   is_required=True),
         _Argument(["input_bam", "input_file", "bam_file"],
                   "BAM file for writing header to",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 17
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("fixmate"),
         _Argument(
             ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
             "Name Sorted Alignment File ",
             filename=True,
             is_required=True),
         _Argument(["out_bam", "output_bam", "output", "output_file"],
                   "Output file",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 18
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("cat"),
            _Option(["-h", "h"], "Header SAM file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"], "Output SAM file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _ArgumentList(["input", "input_bam", "bams"], "Input BAM files",
                          filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 19
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("sort"),
         _Switch(["-o", "o"], """Output the final alignment
                                 to the standard output"""),
         _Switch(["-n", "n"], """Sort by read names rather
                                 than by chromosomal coordinates"""),
         _Option(["-m", "m"], "Approximately the maximum required memory",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Argument(["input_bam"], "Input BAM file",
                   filename=True, is_required=True),
         _Argument(["out_prefix"], "Output prefix",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 20
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(["-s", "s"],
                    """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only"""),
            _Switch(["-S", "S"], """Treat paired-end reads
                                    as single-end reads"""),
            _Argument(["in_bam", "sorted_bam", "input_bam",
                       "input", "input_file"],
                      "Name Sorted Alignment File ",
                      filename=True, is_required=True),
            _Argument(["out_bam", "output_bam", "output", "output_file"],
                      "Output file", filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 21
0
Archivo: _bwa.py Proyecto: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("samse"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file"], "Sai file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read  file name", filename=True, is_required=True),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly.

                            If a read has more than INT hits, the XA tag will not be written. [3]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 22
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("samse"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file"], "Sai file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read  file name", filename=True, is_required=True),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly.

                            If a read has more than INT hits, the XA tag will not be written. [3]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 23
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("cat"),
         _Option(["-h", "h"],
                 "Header SAM file",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-o", "o"],
                 "Output SAM file",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _ArgumentList(["input", "input_bam", "bams"],
                       "Input BAM files",
                       filename=True,
                       is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 24
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("sampe"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file1"], "Sai file 1", filename=True, is_required=True),
                    _Argument(["sai_file2"], "Sai file 2", filename=True, is_required=True),
                    _Argument(["read_file1"], "Read  file 1", filename=True, is_required=True),
                    _Argument(["read_file2"], "Read  file 2", filename=True, is_required=True),
                    _Option(["-a", "a"],
                            """Maximum insert size for a read pair to be considered being mapped properly [500].

                            Since 0.4.5, this option is only used when there are not enough
                            good alignments to infer the distribution of insert sizes.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-o", "o"],
                            """Maximum occurrences of a read for pairing [100000].

                            A read with more occurrences will be treated as a single-end read.
                            Reducing this parameter helps faster pairing.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly [3].

                            If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            """Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) [10].

                         .  If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"], "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, basestring),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 25
0
Archivo: _bwa.py Proyecto: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("sampe"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file1"], "Sai file 1", filename=True, is_required=True),
                    _Argument(["sai_file2"], "Sai file 2", filename=True, is_required=True),
                    _Argument(["read_file1"], "Read  file 1", filename=True, is_required=True),
                    _Argument(["read_file2"], "Read  file 2", filename=True, is_required=True),
                    _Option(["-a", "a"],
                            """Maximum insert size for a read pair to be considered being mapped properly [500].

                            Since 0.4.5, this option is only used when there are not enough
                            good alignments to infer the distribution of insert sizes.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-o", "o"],
                            """Maximum occurrences of a read for pairing [100000].

                            A read with more occurrences will be treated as a single-end read.
                            Reducing this parameter helps faster pairing.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly [3].

                            If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            """Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) [10].

                         .  If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"], "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, basestring),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 26
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(["-s", "s"], """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only"""),
            _Switch(["-S", "S"], """Treat paired-end reads
                                    as single-end reads"""),
            _Argument(
                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
                "Name Sorted Alignment File ",
                filename=True,
                is_required=True),
            _Argument(["out_bam", "output_bam", "output", "output_file"],
                      "Output file",
                      filename=True,
                      is_required=True)
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 27
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("sort"),
         _Switch(["-o", "o"], """Output the final alignment
                                 to the standard output"""),
         _Switch(["-n", "n"], """Sort by read names rather
                                 than by chromosomal coordinates"""),
         _Option(["-m", "m"],
                 "Approximately the maximum required memory",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Argument(["input_bam"],
                   "Input BAM file",
                   filename=True,
                   is_required=True),
         _Argument(["out_prefix"],
                   "Output prefix",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 28
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(["-E", "E"], """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor."""),
            _Switch(["-e", "e"], """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment."""),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(["-r", "r"], """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A)."""),
            _Switch(["-A", "A"],
                    """When used jointly with -r this option overwrites
                    the original base quality"""),
            _Option(["-C", "C"],
                    """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input", "input_file", "in_bam", "infile", "input_bam"],
                      "Input BAM",
                      filename=True,
                      is_required=True),
            _Argument(["reference", "reference_fasta", "ref"],
                      "Reference FASTA to be indexed",
                      filename=True,
                      is_required=True)
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 29
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"], "Input file",
                   filename=True, is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"], "Prefix of BAM output",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"], "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"], """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"], """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 30
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(["-E", "E"],
                    """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor."""),
            _Switch(["-e", "e"],
                    """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment."""),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(["-r", "r"], """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A)."""),
            _Switch(["-A", "A"],
                    """When used jointly with -r this option overwrites
                    the original base quality"""),
            _Option(["-C", "C"], """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input", "input_file", "in_bam", "infile", "input_bam"],
                      "Input BAM", filename=True, is_required=True),
            _Argument(["reference", "reference_fasta", "ref"],
                      "Reference FASTA to be indexed",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 31
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("view"),
            _Switch(["-b", "b"], "Output in the BAM format"),
            _Switch(["-c", "c"],
                    """Instead of printing the alignments, only count them and
                    print the total number.

                    All filter options, such as '-f', '-F' and '-q',
                    are taken into account"""),
            _Switch(["-h", "h"], "Include the header in the output"),
            _Switch(["-u", "u"],
                    """Output uncompressed BAM.

                    This option saves time spent on compression/decompression
                    and is thus preferred when the output is piped to
                    another samtools command"""),
            _Switch(["-H", "H"], "Output the header only"),
            _Switch(["-S", "S"],
                    """Input is in SAM.
                    If @SQ header lines are absent,
                    the '-t' option is required."""),
            _Option(["-t", "t"],
                    """This file is TAB-delimited.
                    Each line must contain the reference name and the
                    length of the reference, one line for each
                    distinct reference; additional fields are ignored.

                    This file also defines the order of the reference
                    sequences in sorting.
                    If you run   'samtools faidx <ref.fa>',
                    the resultant index file <ref.fa>.fai can be used
                    as this <in.ref_list> file.""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"], "Output file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """Only output alignments with all bits in
                    INT present in the FLAG field""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-F", "F"],
                    "Skip alignments with bits present in INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Skip alignments with MAPQ smaller than INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only output reads in read group STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-R", "R"],
                    "Output reads in read groups listed in FILE",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    "Only output reads in library STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Switch(["-1", "fast_bam"],
                    "Use zlib compression level 1 to compress the output"),
            _Argument(["input", "input_file"],
                      "Input File Name", filename=True, is_required=True),
            _Argument(["region"], "Region", is_required=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 32
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("mpileup"),
            _Switch(["-E", "E"], """Extended BAQ computation.
                    This option helps sensitivity especially
                    for MNPs, but may hurt specificity a little bit"""),
            _Switch(["-B", "B"], """Disable probabilistic realignment for the
                    computation of base alignment quality (BAQ).

                    BAQ is the Phred-scaled probability of a read base being
                    misaligned.
                    Applying this option greatly helps to reduce false SNPs
                    caused by misalignments"""),
            _Switch(["-g", "g"],
                    """Compute genotype likelihoods and output them in the
                    binary call format (BCF)"""),
            _Switch(["-u", "u"], """Similar to -g except that the output is
                    uncompressed BCF, which is preferred for piping"""),
            _Option(["-C", "C"],
                    """Coefficient for downgrading mapping quality for
                    reads containing excessive mismatches.

                    Given a read with a phred-scaled probability q of
                    being generated from the mapped position,
                    the new mapping quality is about sqrt((INT-q)/INT)*INT.
                    A zero value disables this functionality;
                    if enabled, the recommended value for BWA is 50""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only generate pileup in region STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """The faidx-indexed reference file in the FASTA format.

                    The file can be optionally compressed by razip""",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    """BED or position list file containing a list of regions
                    or sites where pileup or BCF should be generated""",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-M", "M"],
                    "Cap Mapping Quality at M",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Minimum mapping quality for an alignment to be used",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-Q", "Q"],
                    "Minimum base quality for a base to be considered",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-6", "illumina_13"],
                    "Assume the quality is in the Illumina 1.3+ encoding"),
            _Switch(["-A", "A"],
                    "Do not skip anomalous read pairs in variant calling."),
            _Option(["-b", "b"],
                    "List of input BAM files, one file per line",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-d", "d"],
                    "At a position, read maximally INT reads per input BAM",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-D", "D"], "Output per-sample read depth"),
            _Switch(["-S", "S"], """Output per-sample Phred-scaled
                                strand bias P-value"""),
            _Option(["-e", "e"],
                    """Phred-scaled gap extension sequencing error probability.

                    Reducing INT leads to longer indels""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-h", "h"],
                    """Coefficient for modeling homopolymer errors.

                    Given an l-long homopolymer run, the sequencing error
                    of an indel of size s is modeled as INT*s/l""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-I", "I"], "Do not perform INDEL calling"),
            _Option(["-L", "L"],
                    """Skip INDEL calling if the average per-sample
                    depth is above INT""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-o", "o"],
                    """Phred-scaled gap open sequencing error probability.

                    Reducing INT leads to more indel calls.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-p", "p"],
                    """Comma delimited list of platforms (determined by @RG-PL)
                    from which indel candidates are obtained.

                    It is recommended to collect indel candidates from
                    sequencing technologies that have low indel error rate
                    such as ILLUMINA""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _ArgumentList(["input_file"],
                          "Input File for generating mpileup",
                          filename=True,
                          is_required=True),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 33
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("aln"),
                    _Argument(["reference"], "Reference file name",
                              filename=True, is_required=True),
                    _Argument(["read_file"], "Read file name",
                              filename=True, is_required=True),
                    _Option(["-n", "n"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-o", "o"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-e", "e"],
                            "Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-d", "d"],
                            "Disallow a long deletion within INT bp towards the 3-end [16]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-i", "i"],
                            "Disallow an indel within INT bp towards the ends [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-l", "l"],
                            """Take the first INT subsequence as seed.

                            If INT is larger than the query sequence, seeding will be disabled.
                            For long reads, this option is typically ranged from 25 to 35 for
                            -k 2. [inf]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-k", "k"], "Maximum edit distance in the seed [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"], "Number of threads (multi-threading mode) [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-M", "M"],
                            "Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-O", "O"], "Gap open penalty [11]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-E", "E"], "Gap extension penalty [4]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-R", "R"],
                            """Proceed with suboptimal alignments if there are no more than INT equally best hits.

                            This option only affects paired-end mapping. Increasing this threshold helps
                            to improve the pairing accuracy at the cost of speed, especially for short
                            reads (~32bp).""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            """Parameter for read trimming [0].

                            BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT
                            where l is the original read length.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-B", "B"],
                            "Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Switch(["-c", "c"],
                            "Reverse query but not complement it, which is required for alignment in the color space."),
                    _Switch(["-N", "N"],
                            "Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                    _Switch(["-I", "I"],
                            "The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                    _Switch(["-b", "b"],
                            "Specify the input read sequence file is the BAM format"),
                    _Switch(["-b1", "b1"],
                            "When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                    _Switch(["-b2", "b2"],
                            "When -b is specified, only use the second read in a read pair in mapping.")
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 34
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("view"),
            _Switch(["-b", "b"], "Output in the BAM format"),
            _Switch(["-c", "c"],
                    """Instead of printing the alignments, only count them and
                    print the total number.

                    All filter options, such as '-f', '-F' and '-q',
                    are taken into account"""),
            _Switch(["-h", "h"], "Include the header in the output"),
            _Switch(["-u", "u"], """Output uncompressed BAM.

                    This option saves time spent on compression/decompression
                    and is thus preferred when the output is piped to
                    another samtools command"""),
            _Switch(["-H", "H"], "Output the header only"),
            _Switch(["-S", "S"], """Input is in SAM.
                    If @SQ header lines are absent,
                    the '-t' option is required."""),
            _Option(["-t", "t"],
                    """This file is TAB-delimited.
                    Each line must contain the reference name and the
                    length of the reference, one line for each
                    distinct reference; additional fields are ignored.

                    This file also defines the order of the reference
                    sequences in sorting.
                    If you run   'samtools faidx <ref.fa>',
                    the resultant index file <ref.fa>.fai can be used
                    as this <in.ref_list> file.""",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"],
                    "Output file",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """Only output alignments with all bits in
                    INT present in the FLAG field""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-F", "F"],
                    "Skip alignments with bits present in INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Skip alignments with MAPQ smaller than INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only output reads in read group STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-R", "R"],
                    "Output reads in read groups listed in FILE",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    "Only output reads in library STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Switch(["-1", "fast_bam"],
                    "Use zlib compression level 1 to compress the output"),
            _Argument(["input", "input_file"],
                      "Input File Name",
                      filename=True,
                      is_required=True),
            _Argument(["region"], "Region", is_required=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 35
0
Archivo: _bwa.py Proyecto: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("aln"),
                    _Argument(["reference"], "Reference file name",
                              filename=True, is_required=True),
                    _Argument(["read_file"], "Read file name",
                              filename=True, is_required=True),
                    _Option(["-n", "n"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-o", "o"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-e", "e"],
                            "Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-d", "d"],
                            "Disallow a long deletion within INT bp towards the 3-end [16]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-i", "i"],
                            "Disallow an indel within INT bp towards the ends [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-l", "l"],
                            """Take the first INT subsequence as seed.

                            If INT is larger than the query sequence, seeding will be disabled.
                            For long reads, this option is typically ranged from 25 to 35 for
                            -k 2. [inf]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-k", "k"], "Maximum edit distance in the seed [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"], "Number of threads (multi-threading mode) [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-M", "M"],
                            "Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-O", "O"], "Gap open penalty [11]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-E", "E"], "Gap extension penalty [4]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-R", "R"],
                            """Proceed with suboptimal alignments if there are no more than INT equally best hits.

                            This option only affects paired-end mapping. Increasing this threshold helps
                            to improve the pairing accuracy at the cost of speed, especially for short
                            reads (~32bp).""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            """Parameter for read trimming [0].

                            BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT
                            where l is the original read length.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-B", "B"],
                            "Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Switch(["-c", "c"],
                            "Reverse query but not complement it, which is required for alignment in the color space."),
                    _Switch(["-N", "N"],
                            "Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                    _Switch(["-I", "I"],
                            "The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                    _Switch(["-b", "b"],
                            "Specify the input read sequence file is the BAM format"),
                    _Switch(["-b1", "b1"],
                            "When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                    _Switch(["-b2", "b2"],
                            "When -b is specified, only use the second read in a read pair in mapping.")
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 36
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("mpileup"),
            _Switch(["-E", "E"],
                    """Extended BAQ computation.
                    This option helps sensitivity especially
                    for MNPs, but may hurt specificity a little bit"""),
            _Switch(["-B", "B"],
                    """Disable probabilistic realignment for the
                    computation of base alignment quality (BAQ).

                    BAQ is the Phred-scaled probability of a read base being
                    misaligned.
                    Applying this option greatly helps to reduce false SNPs
                    caused by misalignments"""),
            _Switch(["-g", "g"],
                    """Compute genotype likelihoods and output them in the
                    binary call format (BCF)"""),
            _Switch(["-u", "u"],
                    """Similar to -g except that the output is
                    uncompressed BCF, which is preferred for piping"""),
            _Option(["-C", "C"],
                    """Coefficient for downgrading mapping quality for
                    reads containing excessive mismatches.

                    Given a read with a phred-scaled probability q of
                    being generated from the mapped position,
                    the new mapping quality is about sqrt((INT-q)/INT)*INT.
                    A zero value disables this functionality;
                    if enabled, the recommended value for BWA is 50""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only generate pileup in region STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """The faidx-indexed reference file in the FASTA format.

                    The file can be optionally compressed by razip""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    """BED or position list file containing a list of regions
                    or sites where pileup or BCF should be generated""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-M", "M"],
                    "Cap Mapping Quality at M",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Minimum mapping quality for an alignment to be used",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-Q", "Q"],
                    "Minimum base quality for a base to be considered",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-6", "illumina_13"],
                    "Assume the quality is in the Illumina 1.3+ encoding"),
            _Switch(["-A", "A"],
                    "Do not skip anomalous read pairs in variant calling."),
            _Option(["-b", "b"],
                    "List of input BAM files, one file per line",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-d", "d"],
                    "At a position, read maximally INT reads per input BAM",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-D", "D"], "Output per-sample read depth"),
            _Switch(["-S", "S"], """Output per-sample Phred-scaled
                                strand bias P-value"""),
            _Option(["-e", "e"],
                    """Phred-scaled gap extension sequencing error probability.

                    Reducing INT leads to longer indels""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-h", "h"],
                    """Coefficient for modeling homopolymer errors.

                    Given an l-long homopolymer run, the sequencing error
                    of an indel of size s is modeled as INT*s/l""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-I", "I"], "Do not perform INDEL calling"),
            _Option(["-L", "L"],
                    """Skip INDEL calling if the average per-sample
                    depth is above INT""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-o", "o"],
                    """Phred-scaled gap open sequencing error probability.

                    Reducing INT leads to more indel calls.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-p", "p"],
                    """Comma delimited list of platforms (determined by @RG-PL)
                    from which indel candidates are obtained.

                    It is recommended to collect indel candidates from
                    sequencing technologies that have low indel error rate
                    such as ILLUMINA""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _ArgumentList(["input_file"],
                          "Input File for generating mpileup",
                          filename=True, is_required=True),

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)