Ejemplo n.º 1
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"], """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True, is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 2
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"], """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True, is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
 def __init__(self, cmd="samtools", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("cat"),
         _Option(
             ["-h", "h"],
             "Header SAM file",
             filename=True,
             equate=False,
             checker_function=lambda x: isinstance(x, str),
         ),
         _Option(
             ["-o", "o"],
             "Output SAM file",
             filename=True,
             equate=False,
             checker_function=lambda x: isinstance(x, str),
         ),
         _ArgumentList(
             ["input", "input_bam", "bams"],
             "Input BAM files",
             filename=True,
             is_required=True,
         ),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 4
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("cat"),
            _Option(["-h", "h"], "Header SAM file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"], "Output SAM file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _ArgumentList(["input", "input_bam", "bams"], "Input BAM files",
                          filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 5
0
    def __init__(self, cmd="gt", **kwargs):
        """ Construct and evaluate genometools merge commands.

        Example
        >>> x = Merge(infiles=["test.gff3"], tidy=True)
        >>> print(x)
        gt merge -tidy test.gff3
        """

        self.program_name = f"{cmd} merge"
        self.parameters = [
            _StaticArgument("merge"),
            _Switch(["-tidy", "tidy"],
                    ("Try to tidy the GFF3 files up during parsing.")),
            _Switch(["-retainids", "retainids"],
                    ("when available, use the original IDs provided in the "
                     "source file")),
            _Option(
                ["-o", "outfile"],
                "redirect output to specified file",
                checker_function=check_is_str,
                filename=True,
                equate=False,
            ),
            _Switch(["-gzip", "gzip"], "write gzip compressed output file."),
            _Switch(
                ["-bzip2", "bzip2"],
                "write bzip2 compressed output file.",
            ),
            _Switch(
                ["-force", "force"],
                "force writing to output file",
            ),
            _Switch(["-help", "help"], "Show help and exit"),
            _Switch(["-version", "version"],
                    "display version information and exit"),
            _ArgumentList(["infiles"],
                          "The GFF3 files to operate on.",
                          checker_function=check_is_list_of_str,
                          filename=True,
                          is_required=True)
        ]

        super().__init__(cmd, **kwargs)
        return
Ejemplo n.º 6
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("mpileup"),
            _Switch(["-E", "E"],
                    """Extended BAQ computation.
                    This option helps sensitivity especially
                    for MNPs, but may hurt specificity a little bit"""),
            _Switch(["-B", "B"],
                    """Disable probabilistic realignment for the
                    computation of base alignment quality (BAQ).

                    BAQ is the Phred-scaled probability of a read base being
                    misaligned.
                    Applying this option greatly helps to reduce false SNPs
                    caused by misalignments"""),
            _Switch(["-g", "g"],
                    """Compute genotype likelihoods and output them in the
                    binary call format (BCF)"""),
            _Switch(["-u", "u"],
                    """Similar to -g except that the output is
                    uncompressed BCF, which is preferred for piping"""),
            _Option(["-C", "C"],
                    """Coefficient for downgrading mapping quality for
                    reads containing excessive mismatches.

                    Given a read with a phred-scaled probability q of
                    being generated from the mapped position,
                    the new mapping quality is about sqrt((INT-q)/INT)*INT.
                    A zero value disables this functionality;
                    if enabled, the recommended value for BWA is 50""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only generate pileup in region STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """The faidx-indexed reference file in the FASTA format.

                    The file can be optionally compressed by razip""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    """BED or position list file containing a list of regions
                    or sites where pileup or BCF should be generated""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-M", "M"],
                    "Cap Mapping Quality at M",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Minimum mapping quality for an alignment to be used",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-Q", "Q"],
                    "Minimum base quality for a base to be considered",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-6", "illumina_13"],
                    "Assume the quality is in the Illumina 1.3+ encoding"),
            _Switch(["-A", "A"],
                    "Do not skip anomalous read pairs in variant calling."),
            _Option(["-b", "b"],
                    "List of input BAM files, one file per line",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-d", "d"],
                    "At a position, read maximally INT reads per input BAM",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-D", "D"], "Output per-sample read depth"),
            _Switch(["-S", "S"], """Output per-sample Phred-scaled
                                strand bias P-value"""),
            _Option(["-e", "e"],
                    """Phred-scaled gap extension sequencing error probability.

                    Reducing INT leads to longer indels""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-h", "h"],
                    """Coefficient for modeling homopolymer errors.

                    Given an l-long homopolymer run, the sequencing error
                    of an indel of size s is modeled as INT*s/l""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Switch(["-I", "I"], "Do not perform INDEL calling"),
            _Option(["-L", "L"],
                    """Skip INDEL calling if the average per-sample
                    depth is above INT""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-o", "o"],
                    """Phred-scaled gap open sequencing error probability.

                    Reducing INT leads to more indel calls.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-p", "p"],
                    """Comma delimited list of platforms (determined by @RG-PL)
                    from which indel candidates are obtained.

                    It is recommended to collect indel candidates from
                    sequencing technologies that have low indel error rate
                    such as ILLUMINA""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _ArgumentList(["input_file"],
                          "Input File for generating mpileup",
                          filename=True, is_required=True),

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Ejemplo n.º 7
0
 def __init__(self, *args, cmd='mugsy', **kwargs):
     """ . """
     self.parameters = [
         _Option(
             ['--prefix', 'prefix'],
             'prefix for output files',
             filename=True,
             equate=False,
             ),
         _Option(
             ['--directory', 'directory'],
             ('directory used to store output and temporary'
              'files. Must be a absolute path'),
             filename=True,
             equate=False,
             ),
         _Option(
             ['--minlength', 'minlength'],
             ('minimum span of an aligned region in a colinear\n'
              'block (bp). This is used by the segmentation step\n'
              'synchain-mugsy. Default is 30bp.'),
             checker_function=lambda x: isinstance(x, int),
             equate=False,
             ),
         _Option(
             ['--duplications', 'duplications'],
             '1 - Detect and report duplications. 0 - Skip. Default is 0.',
             checker_function=lambda x: x in (0, 1),
             equate=False,
             ),
         _Option(
             ['--nucmeropts', 'nucmeropts'],
             ('options passed through to the Nucmer\n'
              'package. Eg. -nucmeropts "-l 15" sets the minimum MUM length\n'
              'in NUCmer to 15. See the Nucmer documentation at\n'
              'http://mummer.sf.net for more information. Default is -l 15.'),
             checker_function=lambda x: "'" in x or '"' in x,
             equate=False,
             ),
         _Switch(
             ['-allownestedlcbs', 'allownestedlcbs'],
             ('Default=false. Places each multi-genome anchor\n'
              'in exactly one LCB; the longest spanning LCB'),
             ),
         _Switch(
             ['-plot', 'plot'],
             ('output genome dot plots in GNUplot format. Overlays LCBS\n'
              'onto pairwise plots from mummerplot. Display of draft\n'
              'genomes in these plots is not supported.'),
             ),
         _Switch(
             ['-fullsearch', 'fullsearch'],
             ('Run a complete all pairs Nucmer search with each \n'
              'sequence as a reference and query (n^2-1 total searches).\n'
              'Default is one direction only (n^2-1/2 searches).'),
             ),
         _Option(
             ['-refine', 'refine'],
             ('run an second iteration of Mugsy on each LCB to refine the \n'
              'alignment using either Mugsy (--refine mugsy), FSA (--refine\n'
              'fsa), Pecan (--refine pecan), MLAGAN (--refine mlagan).\n'
              'Requires necessary tools are in your path:\n'
              'fsa: fsa\n'
              'pecan: muscle,exonerate, in the path. classpath set '
              'for bp.pecan.Pecan.\n'
              'mlagan: mlagan.sh'),
             checker_function=lambda x: x in ('mugsy', 'fsa',
                                              'pecan', 'mlagan'),
             equate=False,
             ),
         _ArgumentList(
             ['input'],
             ('Input is one or more (multi)FASTA files, one per genome.\n'
              'Each file should contain all the sequences for a single\n'
              'organism/species. The filename is used as the genome name.\n'
              '\n'
              'Limitations on FASTA input: input FASTA headers must\n'
              "not contain ':' or '-' ambiguity characters are converted \n"
              'to N in output'),
             filename=True,
             is_required=True,
             )
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)