コード例 #1
0
ファイル: _bwa.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("bwasw"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read file", filename=True, is_required=True),
                    _Argument(["mate_file"], "Mate file", filename=True, is_required=False),
                    _Option(["-a", "a"],
                            "Score of a match [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-b", "b"],
                            "Mismatch penalty [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            "Gap open penalty [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Gap extension penalty. The penalty for a contiguous gap of size k is q+k*r. [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"],
                            "Number of threads in the multi-threading mode [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-w", "w"],
                            "Band width in the banded alignment [33]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-T", "T"],
                            "Minimum score threshold divided by a [37]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-c", "c"],
                            """Coefficient for threshold adjustment according to query length [5.5].

                            Given an l-long query, the threshold for a hit to be retained is
                            a*max{T,c*log(l)}.""",
                            checker_function=lambda x: isinstance(x, float),
                            equate=False),
                    _Option(["-z", "z"],
                            "Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-s", "s"],
                            """Maximum SA interval size for initiating a seed [3].

                            Higher -s increases accuracy at the cost of speed.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            "Minimum number of seeds supporting the resultant alignment to skip reverse alignment. [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #2
0
ファイル: _bwa.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("bwasw"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read file", filename=True, is_required=True),
                    _Argument(["mate_file"], "Mate file", filename=True, is_required=False),
                    _Option(["-a", "a"],
                            "Score of a match [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-b", "b"],
                            "Mismatch penalty [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            "Gap open penalty [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Gap extension penalty. The penalty for a contiguous gap of size k is q+k*r. [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"],
                            "Number of threads in the multi-threading mode [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-w", "w"],
                            "Band width in the banded alignment [33]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-T", "T"],
                            "Minimum score threshold divided by a [37]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-c", "c"],
                            """Coefficient for threshold adjustment according to query length [5.5].

                            Given an l-long query, the threshold for a hit to be retained is
                            a*max{T,c*log(l)}.""",
                            checker_function=lambda x: isinstance(x, float),
                            equate=False),
                    _Option(["-z", "z"],
                            "Z-best heuristics. Higher -z increases accuracy at the cost of speed. [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-s", "s"],
                            """Maximum SA interval size for initiating a seed [3].

                            Higher -s increases accuracy at the cost of speed.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            "Minimum number of seeds supporting the resultant alignment to skip reverse alignment. [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #3
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("reheader"),
         _Argument(["input_header", "header_sam", "sam_file"],
                   "Sam file with header",
                   filename=True, is_required=True),
         _Argument(["input_bam", "input_file", "bam_file"],
                   "BAM file for writing header to",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #4
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("fixmate"),
         _Argument(["in_bam", "sorted_bam", "input_bam",
                    "input", "input_file"],
                   "Name Sorted Alignment File ",
                   filename=True, is_required=True),
         _Argument(["out_bam", "output_bam", "output", "output_file"],
                   "Output file",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #5
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("reheader"),
         _Argument(["input_header", "header_sam", "sam_file"],
                   "Sam file with header",
                   filename=True,
                   is_required=True),
         _Argument(["input_bam", "input_file", "bam_file"],
                   "BAM file for writing header to",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #6
0
ファイル: Controller.py プロジェクト: kaspermunch/sap
 def __init__(self, genepop_dir=None, cmd="Genepop", **kwargs):
     self.parameters = [
         _Argument(["command"], "GenePop option to be called", is_required=True),
         _Argument(["mode"], "Should allways be batch", is_required=True),
         _Argument(["input"], "Input file", is_required=True),
         _Argument(["Dememorization"], "Dememorization step"),
         _Argument(["BatchNumber"], "Number of MCMC batches"),
         _Argument(["BatchLength"], "Length of MCMC chains"),
         _Argument(["HWtests"], "Enumeration or MCMC"),
         _Argument(["IsolBDstatistic"], "IBD statistic (a or e)"),
         _Argument(["MinimalDistance"], "Minimal IBD distance"),
         _Argument(["GeographicScale"], "Log or Linear"),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
     self.set_parameter("mode", "Mode=Batch")
コード例 #7
0
ファイル: _AlignAce.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="CompareACE", **kwargs):
        import os.path

        self.parameters = \
          [
            _Argument(["motif1"],
                        "name of file containing motif 1",
                        checker_function=lambda x: isinstance(x, str),
                        filename=True),
            _Argument(["motif2"],
                        "name of file containing motif 2",
                        checker_function=lambda x: isinstance(x, str),
                        filename=True),
          ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #8
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("fixmate"),
         _Argument(
             ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
             "Name Sorted Alignment File ",
             filename=True,
             is_required=True),
         _Argument(["out_bam", "output_bam", "output", "output_file"],
                   "Output file",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #9
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"], """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True, is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #10
0
ファイル: _bwa.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("index"),
                    _Option(["-a", "a", "algorithm"],
                            """Algorithm for constructing BWT index.

                            Available options are:
                             - is:    IS linear-time algorithm for constructing suffix array.
                                      It requires 5.37N memory where N is the size of the database.
                                      IS is moderately fast, but does not work with database larger
                                      than 2GB. IS is the default algorithm due to its simplicity.
                             - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                                      whole human genome, but it does not work with database
                                      smaller than 10MB and it is usually slower than IS.""",
                            checker_function=lambda x: x in ["is", "bwtsw"],
                            equate=False, is_required=True),
                    _Option(["-p", "p", "prefix"],
                            "Prefix of the output database [same as db filename]",
                            equate=False, is_required=False),
                    _Argument(["infile"], "Input file name", filename=True, is_required=True),
                    _Switch(["-c", "c"],
                            "Build color-space index. The input fasta should be in nucleotide space.")
                ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #11
0
ファイル: Controller.py プロジェクト: cbirdlab/sap
 def __init__(self, genepop_dir=None, cmd='Genepop', **kwargs):
     self.parameters = [
         _Argument(["command"], "GenePop option to be called",
                   is_required=True),
         _Argument(["mode"], "Should allways be batch", is_required=True),
         _Argument(["input"], "Input file", is_required=True),
         _Argument(["Dememorization"], "Dememorization step"),
         _Argument(["BatchNumber"], "Number of MCMC batches"),
         _Argument(["BatchLength"], "Length of MCMC chains"),
         _Argument(["HWtests"], "Enumeration or MCMC"),
         _Argument(["IsolBDstatistic"], "IBD statistic (a or e)"),
         _Argument(["MinimalDistance"], "Minimal IBD distance"),
         _Argument(["GeographicScale"], "Log or Linear"),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
     self.set_parameter("mode", "Mode=Batch")
コード例 #12
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("targetcut"),
            _Option(["-Q", "Q"], "Minimum Base Quality ",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-i", "i"], "Insertion Penalty",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-f", "f"], "Reference Filename",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-0", "em0"], "em0", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-1", "em1"], "em1", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-2", "em2"], "em2", equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Argument(["input", "input_bam", "in_bam"],
                      "Input file",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #13
0
 def __init__(self, cmd="msaprobs", **kwargs):
     # order of parameters is the same as in msaprobs -help
     self.parameters = \
         [
         _Option(["-o", "--outfile", "outfile"],
                 "specify the output file name (STDOUT by default)",
                 filename=True,
                 equate=False),
         _Option(["-num_threads", "numthreads"],
                 "specify the number of threads used, and otherwise detect automatically",
                 checker_function=lambda x: isinstance(x, int)),
         _Switch(["-clustalw", "clustalw"],
                 "use CLUSTALW output format instead of FASTA format"),
         _Option(["-c", "consistency"],
                 "use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
                 checker_function=lambda x: isinstance(x, int) and 0 <= x <= 5),
         _Option(["-ir", "--iterative-refinement", "iterative_refinement"],
                 "use 0 <= REPS <= 1000 (default: 10) passes of iterative-refinement",
                 checker_function=lambda x: isinstance(x, int) and 0 <= x <= 1000),
         _Switch(["-v", "verbose"],
                 "report progress while aligning (default: off)"),
         _Option(["-annot", "annot"],
                 "write annotation for multiple alignment to FILENAME",
                 filename=True),
         _Switch(["-a", "--alignment-order", "alignment_order"],
                 "print sequences in alignment order rather than input order (default: off)"),
         _Option(["-version", "version"],
                 "print out version of MSAPROBS"),
         _Argument(["infile"],
                 "Multiple sequence input file",
                 filename=True),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #14
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"],
                 """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True,
                   is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True,
                       is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #15
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("targetcut"),
         _Option(["-Q", "Q"],
                 "Minimum Base Quality ",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-i", "i"],
                 "Insertion Penalty",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-f", "f"],
                 "Reference Filename",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-0", "em0"],
                 "em0",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-1", "em1"],
                 "em1",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-2", "em2"],
                 "em2",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["input", "input_bam", "in_bam"],
                   "Input file",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #16
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"],
                   "Input file",
                   filename=True,
                   is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"],
                 "Prefix of BAM output",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"],
                 "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"],
                 """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"],
                 """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #17
0
ファイル: _bwa.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("index"),
                    _Option(["-a", "a", "algorithm"],
                            """Algorithm for constructing BWT index.

                            Available options are:
                             - is:    IS linear-time algorithm for constructing suffix array.
                                      It requires 5.37N memory where N is the size of the database.
                                      IS is moderately fast, but does not work with database larger
                                      than 2GB. IS is the default algorithm due to its simplicity.
                             - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                                      whole human genome, but it does not work with database
                                      smaller than 10MB and it is usually slower than IS.""",
                            checker_function=lambda x: x in ["is", "bwtsw"],
                            equate=False, is_required=True),
                    _Option(["-p", "p", "prefix"],
                            "Prefix of the output database [same as db filename]",
                            equate=False, is_required=False),
                    _Argument(["infile"], "Input file name", filename=True, is_required=True),
                    _Switch(["-c", "c"],
                            "Build color-space index. The input fasta should be in nucleotide space.")
                ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #18
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("sort"),
         _Switch(["-o", "o"], """Output the final alignment
                                 to the standard output"""),
         _Switch(["-n", "n"], """Sort by read names rather
                                 than by chromosomal coordinates"""),
         _Option(["-m", "m"], "Approximately the maximum required memory",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Argument(["input_bam"], "Input BAM file",
                   filename=True, is_required=True),
         _Argument(["out_prefix"], "Output prefix",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #19
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("idxstats"),
         _Argument(["input",  "in_bam", "input_bam"],
                   "BAM file to be indexed")
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #20
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("idxstats"),
         _Argument(["input", "in_bam", "input_bam"],
                   "BAM file to be indexed")
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #21
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("faidx"),
         _Argument(["reference", "reference_fasta", "ref"],
                   "Reference FASTA to be indexed",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #22
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("faidx"),
            _Argument(["reference", "reference_fasta", "ref"],
                      "Reference FASTA to be indexed",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #23
0
ファイル: _alignace.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="CompareACE", **kwargs):
     warnings.warn("""The CompareACE application wrapper is deprecated and
                   is likely to be removed in a future release of Biopython,
                   since an up to date version of the AlignACE software
                   cannot be obtained anymore. If you have a copy of
                   AlignACE 4, please consider contacting the Biopython
                   developers.""", BiopythonDeprecationWarning)
     self.parameters = \
       [
         _Argument(["motif1"],
                     "name of file containing motif 1",
                     checker_function=lambda x: isinstance(x, str),
                     filename=True),
         _Argument(["motif2"],
                     "name of file containing motif 2",
                     checker_function=lambda x: isinstance(x, str),
                     filename=True),
       ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #24
0
ファイル: _bwa.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("samse"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file"], "Sai file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read  file name", filename=True, is_required=True),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly.

                            If a read has more than INT hits, the XA tag will not be written. [3]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #25
0
ファイル: _bwa.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("samse"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file"], "Sai file name", filename=True, is_required=True),
                    _Argument(["read_file"], "Read  file name", filename=True, is_required=True),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly.

                            If a read has more than INT hits, the XA tag will not be written. [3]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"],
                            "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #26
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(["-s", "s"],
                    """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only"""),
            _Switch(["-S", "S"], """Treat paired-end reads
                                    as single-end reads"""),
            _Argument(["in_bam", "sorted_bam", "input_bam",
                       "input", "input_file"],
                      "Name Sorted Alignment File ",
                      filename=True, is_required=True),
            _Argument(["out_bam", "output_bam", "output", "output_file"],
                      "Output file", filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #27
0
ファイル: _bwa.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("sampe"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file1"], "Sai file 1", filename=True, is_required=True),
                    _Argument(["sai_file2"], "Sai file 2", filename=True, is_required=True),
                    _Argument(["read_file1"], "Read  file 1", filename=True, is_required=True),
                    _Argument(["read_file2"], "Read  file 2", filename=True, is_required=True),
                    _Option(["-a", "a"],
                            """Maximum insert size for a read pair to be considered being mapped properly [500].

                            Since 0.4.5, this option is only used when there are not enough
                            good alignments to infer the distribution of insert sizes.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-o", "o"],
                            """Maximum occurrences of a read for pairing [100000].

                            A read with more occurrences will be treated as a single-end read.
                            Reducing this parameter helps faster pairing.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly [3].

                            If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            """Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) [10].

                         .  If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"], "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, basestring),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #28
0
ファイル: _bwa.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("sampe"),
                    _Argument(["reference"], "Reference file name", filename=True, is_required=True),
                    _Argument(["sai_file1"], "Sai file 1", filename=True, is_required=True),
                    _Argument(["sai_file2"], "Sai file 2", filename=True, is_required=True),
                    _Argument(["read_file1"], "Read  file 1", filename=True, is_required=True),
                    _Argument(["read_file2"], "Read  file 2", filename=True, is_required=True),
                    _Option(["-a", "a"],
                            """Maximum insert size for a read pair to be considered being mapped properly [500].

                            Since 0.4.5, this option is only used when there are not enough
                            good alignments to infer the distribution of insert sizes.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-o", "o"],
                            """Maximum occurrences of a read for pairing [100000].

                            A read with more occurrences will be treated as a single-end read.
                            Reducing this parameter helps faster pairing.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-n", "n"],
                            """Maximum number of alignments to output in the XA tag for reads paired properly [3].

                            If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-N", "N"],
                            """Maximum number of alignments to output in the XA tag for disconcordant read pairs (excluding singletons) [10].

                         .  If a read has more than INT hits, the XA tag will not be written.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-r", "r"], "Specify the read group in a format like '@RG\tID:foo\tSM:bar'. [null]",
                            checker_function=lambda x: isinstance(x, basestring),
                            equate=False),
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #29
0
ファイル: _Probcons.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="probcons", **kwargs):
     self.parameters = \
         [
         #Note that some options cannot be assigned via properties using the
         #original documented option (because hyphens are not valid for names in
         #python), e.g cmdline.pre-training = 3 will not work
         #In these cases the shortened option name should be used
         #cmdline.pre = 3
         _Switch(["-clustalw", "clustalw"],
                 "Use CLUSTALW output format instead of MFA"),
         _Option(["-c", "c", "--consistency", "consistency" ],
                 "Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
                 checker_function=lambda x: x in range(0, 6),
                 equate=False),
         _Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"],
                 "Use 0 <= REPS <= 1000 (default: 100) passes of "
                 "iterative-refinement",
                 checker_function=lambda x: x in range(0, 1001),
                 equate=False),
         _Option(["-pre", "--pre-training", "pre-training", "pre"],
                 "Use 0 <= REPS <= 20 (default: 0) rounds of pretraining",
                 checker_function=lambda x: x in range(0, 21),
                 equate=False),
         _Switch(["-pairs", "pairs"],
                 "Generate all-pairs pairwise alignments"),
         _Switch(["-viterbi", "viterbi"],
                 "Use Viterbi algorithm to generate all pairs "
                 "(automatically enables -pairs)"),
         _Switch(["-verbose", "verbose"],
                 "Report progress while aligning (default: off)"),
         _Option(["-annot", "annot"],
                 "Write annotation for multiple alignment to FILENAME",
                 equate=False),
         _Option(["-t", "t", "--train", "train"],
                 "Compute EM transition probabilities, store in FILENAME "
                 "(default: no training)",
                 equate=False),
         _Switch(["-e", "e", "--emissions", "emissions"],
                 "Also reestimate emission probabilities (default: off)"),
         _Option(["-p", "p", "--paramfile", "paramfile"],
                 "Read parameters from FILENAME",
                 equate=False),
         _Switch(["-a", "--alignment-order", "alignment-order", "a"],
                 "Print sequences in alignment order rather than input "
                 "order (default: off)"),
         #Input file name
         _Argument(["input"],
                   "Input file name. Must be multiple FASTA alignment "+
                   "(MFA) format",
                   filename=True,
                   is_required=True),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #30
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(["-s", "s"], """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only"""),
            _Switch(["-S", "S"], """Treat paired-end reads
                                    as single-end reads"""),
            _Argument(
                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
                "Name Sorted Alignment File ",
                filename=True,
                is_required=True),
            _Argument(["out_bam", "output_bam", "output", "output_file"],
                      "Output file",
                      filename=True,
                      is_required=True)
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #31
0
ファイル: _Probcons.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="probcons", **kwargs):
     self.parameters = \
         [
         #Note that some options cannot be assigned via properties using the
         #original documented option (because hyphens are not valid for names in
         #python), e.g cmdline.pre-training = 3 will not work
         #In these cases the shortened option name should be used
         #cmdline.pre = 3
         _Switch(["-clustalw", "clustalw"],
                 "Use CLUSTALW output format instead of MFA"),
         _Option(["-c", "c", "--consistency", "consistency" ],
                 "Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
                 checker_function=lambda x: x in range(0, 6),
                 equate=False),
         _Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"],
                 "Use 0 <= REPS <= 1000 (default: 100) passes of "
                 "iterative-refinement",
                 checker_function=lambda x: x in range(0, 1001),
                 equate=False),
         _Option(["-pre", "--pre-training", "pre-training", "pre"],
                 "Use 0 <= REPS <= 20 (default: 0) rounds of pretraining",
                 checker_function=lambda x: x in range(0, 21),
                 equate=False),
         _Switch(["-pairs", "pairs"],
                 "Generate all-pairs pairwise alignments"),
         _Switch(["-viterbi", "viterbi"],
                 "Use Viterbi algorithm to generate all pairs "
                 "(automatically enables -pairs)"),
         _Switch(["-verbose", "verbose"],
                 "Report progress while aligning (default: off)"),
         _Option(["-annot", "annot"],
                 "Write annotation for multiple alignment to FILENAME",
                 equate=False),
         _Option(["-t", "t", "--train", "train"],
                 "Compute EM transition probabilities, store in FILENAME "
                 "(default: no training)",
                 equate=False),
         _Switch(["-e", "e", "--emissions", "emissions"],
                 "Also reestimate emission probabilities (default: off)"),
         _Option(["-p", "p", "--paramfile", "paramfile"],
                 "Read parameters from FILENAME",
                 equate=False),
         _Switch(["-a", "--alignment-order", "alignment-order", "a"],
                 "Print sequences in alignment order rather than input "
                 "order (default: off)"),
         #Input file name
         _Argument(["input"],
                   "Input file name. Must be multiple FASTA alignment "+
                   "(MFA) format",
                   filename=True,
                   is_required=True),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #32
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("sort"),
         _Switch(["-o", "o"], """Output the final alignment
                                 to the standard output"""),
         _Switch(["-n", "n"], """Sort by read names rather
                                 than by chromosomal coordinates"""),
         _Option(["-m", "m"],
                 "Approximately the maximum required memory",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Argument(["input_bam"],
                   "Input BAM file",
                   filename=True,
                   is_required=True),
         _Argument(["out_prefix"],
                   "Output prefix",
                   filename=True,
                   is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #33
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(["-E", "E"], """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor."""),
            _Switch(["-e", "e"], """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment."""),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(["-r", "r"], """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A)."""),
            _Switch(["-A", "A"],
                    """When used jointly with -r this option overwrites
                    the original base quality"""),
            _Option(["-C", "C"],
                    """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input", "input_file", "in_bam", "infile", "input_bam"],
                      "Input BAM",
                      filename=True,
                      is_required=True),
            _Argument(["reference", "reference_fasta", "ref"],
                      "Reference FASTA to be indexed",
                      filename=True,
                      is_required=True)
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #34
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(["-E", "E"],
                    """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor."""),
            _Switch(["-e", "e"],
                    """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment."""),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(["-r", "r"], """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A)."""),
            _Switch(["-A", "A"],
                    """When used jointly with -r this option overwrites
                    the original base quality"""),
            _Option(["-C", "C"], """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input", "input_file", "in_bam", "infile", "input_bam"],
                      "Input BAM", filename=True, is_required=True),
            _Argument(["reference", "reference_fasta", "ref"],
                      "Reference FASTA to be indexed",
                      filename=True, is_required=True)

        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #35
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"], "Input file",
                   filename=True, is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"], "Prefix of BAM output",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"], "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"], """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"], """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #36
0
    def __init__(self, cmd="mafft", **kwargs):
        BLOSUM_MATRICES = ["30", "45", "62", "80"]
        self.parameters = \
            [
            #**** Algorithm ****
            #Automatically selects an appropriate strategy from L-INS-i, FFT-NS-
            #i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2)
            _Switch(["--auto", "auto"],
                    "Automatically select strategy. Default off."),
            #Distance is calculated based on the number of shared 6mers. Default: on
            _Switch(["--6merpair", "6merpair", "sixmerpair"],
                    "Distance is calculated based on the number of shared "
                    "6mers. Default: on"),
            #All pairwise alignments are computed with the Needleman-Wunsch
            #algorithm. More accurate but slower than --6merpair. Suitable for a
            #set of globally alignable sequences. Applicable to up to ~200
            #sequences. A combination with --maxiterate 1000 is recommended (G-
            #INS-i). Default: off (6mer distance is used)
            _Switch(["--globalpair", "globalpair"],
                    "All pairwise alignments are computed with the "
                    "Needleman-Wunsch algorithm. Default: off"),
            #All pairwise alignments are computed with the Smith-Waterman
            #algorithm. More accurate but slower than --6merpair. Suitable for a
            #set of locally alignable sequences. Applicable to up to ~200
            #sequences. A combination with --maxiterate 1000 is recommended (L-
            #INS-i). Default: off (6mer distance is used)
            _Switch(["--localpair", "localpair"],
                    "All pairwise alignments are computed with the "
                    "Smith-Waterman algorithm. Default: off"),
            #All pairwise alignments are computed with a local algorithm with
            #the generalized affine gap cost (Altschul 1998). More accurate but
            #slower than --6merpair. Suitable when large internal gaps are
            #expected. Applicable to up to ~200 sequences. A combination with --
            #maxiterate 1000 is recommended (E-INS-i). Default: off (6mer
            #distance is used)
            _Switch(["--genafpair", "genafpair"],
                    "All pairwise alignments are computed with a local "
                    "algorithm with the generalized affine gap cost "
                    "(Altschul 1998). Default: off"),
            #All pairwise alignments are computed with FASTA (Pearson and Lipman
            #1988). FASTA is required. Default: off (6mer distance is used)
            _Switch(["--fastapair", "fastapair"],
                    "All pairwise alignments are computed with FASTA "
                    "(Pearson and Lipman 1988). Default: off"),
            #Weighting factor for the consistency term calculated from pairwise
            #alignments. Valid when either of --blobalpair, --localpair, --
            #genafpair, --fastapair or --blastpair is selected. Default: 2.7
            _Option(["--weighti", "weighti"],
                    "Weighting factor for the consistency term calculated "
                    "from pairwise alignments. Default: 2.7",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Guide tree is built number times in the progressive stage. Valid
            #with 6mer distance. Default: 2
            _Option(["--retree", "retree"],
                    "Guide tree is built number times in the progressive "
                    "stage. Valid with 6mer distance. Default: 2",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            #Number cycles of iterative refinement are performed. Default: 0
            _Option(["--maxiterate", "maxiterate"],
                    "Number cycles of iterative refinement are performed. "
                    "Default: 0",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            #Use FFT approximation in group-to-group alignment. Default: on
            _Switch(["--fft", "fft"],
                    "Use FFT approximation in group-to-group alignment. "
                    "Default: on"),
            #Do not use FFT approximation in group-to-group alignment. Default:
            #off
            _Switch(["--nofft", "nofft"],
                    "Do not use FFT approximation in group-to-group "
                    "alignment. Default: off"),
            #Alignment score is not checked in the iterative refinement stage.
            #Default: off (score is checked)
            _Switch(["--noscore", "noscore"],
                    "Alignment score is not checked in the iterative "
                    "refinement stage. Default: off (score is checked)"),
            #Use the Myers-Miller (1988) algorithm. Default: automatically
            #turned on when the alignment length exceeds 10,000 (aa/nt).
            _Switch(["--memsave", "memsave"],
                    "Use the Myers-Miller (1988) algorithm. Default: "
                    "automatically turned on when the alignment length "
                    "exceeds 10,000 (aa/nt)."),
            #Use a fast tree-building method (PartTree, Katoh and Toh 2007) with
            #the 6mer distance. Recommended for a large number (> ~10,000) of
            #sequences are input. Default: off
            _Switch(["--parttree", "parttree"],
                    "Use a fast tree-building method with the 6mer "
                    "distance. Default: off"),
            #The PartTree algorithm is used with distances based on DP. Slightly
            #more accurate and slower than --parttree. Recommended for a large
            #number (> ~10,000) of sequences are input. Default: off
            _Switch(["--dpparttree", "dpparttree"],
                    "The PartTree algorithm is used with distances "
                    "based on DP. Default: off"),
            #The PartTree algorithm is used with distances based on FASTA.
            #Slightly more accurate and slower than --parttree. Recommended for
            #a large number (> ~10,000) of sequences are input. FASTA is
            #required. Default: off
            _Switch(["--fastaparttree", "fastaparttree"],
                    "The PartTree algorithm is used with distances based "
                    "on FASTA. Default: off"),
            #The number of partitions in the PartTree algorithm. Default: 50
            _Option(["--partsize", "partsize"],
                    "The number of partitions in the PartTree algorithm. "
                    "Default: 50",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            #Do not make alignment larger than number sequences. Valid only with
            #the --*parttree options. Default: the number of input sequences
            _Switch(["--groupsize", "groupsize"],
                    "Do not make alignment larger than number sequences. "
                    "Default: the number of input sequences"),
            #Adjust direction according to the first sequence
            #Mafft V6 beta function
            _Switch(["--adjustdirection", "adjustdirection"],
                    "Adjust direction according to the first sequence. "
                    "Default off."),
            #Adjust direction according to the first sequence
            #for highly diverged data; very slow
            #Mafft V6 beta function
            _Switch(["--adjustdirectionaccurately", "adjustdirectionaccurately"],
                    "Adjust direction according to the first sequence,"
                    "for highly diverged data; very slow"
                    "Default off."),
            #**** Parameter ****
            #Gap opening penalty at group-to-group alignment. Default: 1.53
            _Option(["--op", "op"],
                    "Gap opening penalty at group-to-group alignment. "
                    "Default: 1.53",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Offset value, which works like gap extension penalty, for group-to-
            #group alignment. Deafult: 0.123
            _Option(["--ep", "ep"],
                    "Offset value, which works like gap extension penalty, "
                    "for group-to- group alignment. Default: 0.123",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap opening penalty at local pairwise alignment. Valid when the --
            #localpair or --genafpair option is selected. Default: -2.00
            _Option(["--lop", "lop"],
                    "Gap opening penalty at local pairwise alignment. "
                    "Default: 0.123",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Offset value at local pairwise alignment. Valid when the --
            #localpair or --genafpair option is selected. Default: 0.1
            _Option(["--lep", "lep"],
                    "Offset value at local pairwise alignment. "
                    "Default: 0.1",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap extension penalty at local pairwise alignment. Valid when the -
            #-localpair or --genafpair option is selected. Default: -0.1
            _Option(["--lexp", "lexp"],
                    "Gap extension penalty at local pairwise alignment. "
                    "Default: -0.1",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap opening penalty to skip the alignment. Valid when the --
            #genafpair option is selected. Default: -6.00
            _Option(["--LOP", "LOP"],
                    "Gap opening penalty to skip the alignment. "
                    "Default: -6.00",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap extension penalty to skip the alignment. Valid when the --
            #genafpair option is selected. Default: 0.00
            _Option(["--LEXP", "LEXP"],
                    "Gap extension penalty to skip the alignment. "
                    "Default: 0.00",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),

            #BLOSUM number matrix (Henikoff and Henikoff 1992) is used.
            #number=30, 45, 62 or 80. Default: 62
            _Option(["--bl", "bl"],
                    "BLOSUM number matrix is used. Default: 62",
                    checker_function=lambda x: x in BLOSUM_MATRICES,
                    equate=False),
            #JTT PAM number (Jones et al. 1992) matrix is used. number>0.
            #Default: BLOSUM62
            _Option(["--jtt", "jtt"],
                    "JTT PAM number (Jones et al. 1992) matrix is used. "
                    "number>0. Default: BLOSUM62",
                    equate=False),
            #Transmembrane PAM number (Jones et al. 1994) matrix is used.
            #number>0. Default: BLOSUM62
            _Option(["--tm", "tm"],
                    "Transmembrane PAM number (Jones et al. 1994) "
                    "matrix is used. number>0. Default: BLOSUM62",
                    filename=True,
                    equate=False),
            #Use a user-defined AA scoring matrix. The format of matrixfile is
            #the same to that of BLAST. Ignored when nucleotide sequences are
            #input. Default: BLOSUM62
            _Option(["--aamatrix", "aamatrix"],
                    "Use a user-defined AA scoring matrix. "
                    "Default: BLOSUM62",
                    filename=True,
                    equate=False),
            #Incorporate the AA/nuc composition information into the scoring
            #matrix. Default: off
            _Switch(["--fmodel", "fmodel"],
                    "Incorporate the AA/nuc composition information into "
                    "the scoring matrix (True) or not (False, default)"),
            #**** Output ****
            #Name length for CLUSTAL and PHYLIP format output
            _Option(["--namelength", "namelength"],
                    """Name length in CLUSTAL and PHYLIP output.

                    MAFFT v6.847 (2011) added --namelength for use with
                    the --clustalout option for CLUSTAL output.

                    MAFFT v7.024 (2013) added support for this with the
                    --phylipout option for PHYLIP output (default 10).
                    """,
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            #Output format: clustal format. Default: off (fasta format)
            _Switch(["--clustalout", "clustalout"],
                    "Output format: clustal (True) or fasta (False, default)"),
            #Output format: phylip format.
            #Added in beta with v6.847, fixed in v6.850 (2011)
            _Switch(["--phylipout", "phylipout"],
                    "Output format: phylip (True), or fasta (False, default)"),
            #Output order: same as input. Default: on
            _Switch(["--inputorder", "inputorder"],
                    "Output order: same as input (True, default) or alignment "
                    "based (False)"),
            #Output order: aligned. Default: off (inputorder)
            _Switch(["--reorder", "reorder"],
                    "Output order: aligned (True) or in input order (False, "
                    "default)"),
            #Guide tree is output to the input.tree file. Default: off
            _Switch(["--treeout", "treeout"],
                    "Guide tree is output to the input.tree file (True) or "
                    "not (False, default)"),
            #Do not report progress. Default: off
            _Switch(["--quiet", "quiet"],
                    "Do not report progress (True) or not (False, default)."),
            #**** Input ****
            #Assume the sequences are nucleotide. Deafult: auto
            _Switch(["--nuc", "nuc"],
                    "Assume the sequences are nucleotide (True/False). "
                    "Default: auto"),
            #Assume the sequences are amino acid. Deafult: auto
            _Switch(["--amino", "amino"],
                    "Assume the sequences are amino acid (True/False). "
                    "Default: auto"),
            ###################### SEEDS #####################################
            # MAFFT has multiple --seed commands where the unaligned input is
            # aligned to the seed alignment. There can be multiple seeds in the
            # form: "mafft --seed align1 --seed align2 [etc] input"
            # Effectively for n number of seed alignments. Here we're going to
            # assume 6 extra are enough
            _Option(["--seed", "seed"],
                    "Seed alignments given in alignment_n (fasta format) "
                    "are aligned with sequences in input.",
                    filename=True,
                    equate=False),
            #The old solution of also defining extra parameters with
            #["--seed", "seed1"] etc worked, but clashes with the recent
            #code in the base class to look for duplicate parameters and raise
            #an error.  Perhaps that check should be ignored here, or maybe
            #we can handle this more elegantly...
            #TODO - Create an _OptionList parameter which allows a list to be
            #assigned to the value?
            ####################### END SEEDS  ################################
            #The input (must be FASTA format)
            _Argument(["input"],
                      "Input file name",
                      filename=True,
                      is_required=True),
            ###################################################################
            #mafft-profile takes a second alignment input as an argument:
            #mafft-profile align1 align2
            _Argument(["input1"],
                      "Second input file name for the mafft-profile command",
                      filename=True),
            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #37
0
ファイル: _Fasttree.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd='fasttree', **kwargs):
        self.parameters = [
            _Switch(
                ['-nt', 'nt'],
                """By default FastTree expects protein alignments, use -nt for nucleotides""",
            ),
            _Option(
                ['-n', 'n'],
                """-n -- read N multiple alignments in.

                    This only works with phylip interleaved format. For example, you can
                    use it with the output from phylip's seqboot. If you use -n, FastTree
                    will write 1 tree per line to standard output.""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-quote', 'quote'],
                """-quote -- add quotes to sequence names in output.

                    Quote sequence names in the output and allow spaces, commas,
                    parentheses, and colons in them but not ' characters (fasta files only).""",
            ),
            _Option(
                ['-pseudo', 'pseudo'],
                """-pseudo [weight] -- Pseudocounts are used with sequence distance estimation.

                    Use pseudocounts to estimate distances between sequences with little or no
                    overlap. (Off by default.) Recommended if analyzing the alignment has
                    sequences with little or no overlap.
                    If the weight is not specified, it is 1.0 """,
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-boot', 'boot'],
                """Specify the number of resamples for support values.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1

                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-nosupport', 'nosupport'],
                """Turn off support values.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1

                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.""",
            ),
            _Option(
                ['-intree', 'intree'],
                """-intree newickfile -- read the starting tree in from newickfile.

                    Any branch lengths in the starting trees are ignored.
                    -intree with -n will read a separate starting tree for each alignment.""",
                filename=True,
                equate=False,
            ),
            _Option(
                ['-intree1', 'intree1'],
                """-intree1 newickfile -- read the same starting tree for each alignment.""",
                filename=True,
                equate=False,
            ),
            _Switch(
                ['-quiet', 'quiet'],
                """-quiet -- do not write to standard error during normal operation

                    (no progress indicator, no options summary, no likelihood values, etc.)""",
            ),
            _Switch(
                ['-nopr', 'nopr'],
                """-nopr -- do not write the progress indicator to stderr.""",
            ),
            _Option(
                ['-nni', 'nni'],
                """Set the rounds of minimum-evolution nearest-neighbor interchanges

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs.""",
                checker_function=_is_int,
                equate=False,
            ),
            _Option(
                ['-spr', 'spr'],
                """Set the rounds of subtree-prune-regraft moves

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-noml', 'noml'],
                """Deactivate min-evo NNIs and SPRs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -noml to turn off both min-evo NNIs and SPRs (useful if refining
                    an approximately maximum-likelihood tree with further NNIs) """,
            ),
            _Switch(
                ['-mllen', 'mllen'],
                """Optimize branch lengths on a fixed topology.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mllen to optimize branch lengths without ML NNIs
                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology.""",
            ),
            _Switch(
                ['-nome', 'nome'],
                """Changes support values calculation to a minimum-evolution bootstrap method.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mllen to optimize branch lengths without ML NNIs
                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1.""",
            ),
            _Option(
                ['-mlnni', 'mlnni'],
                """Set the number of rounds of maximum-likelihood NNIs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mlnni to set the number of rounds of maximum-likelihood NNIs.""",
                checker_function=_is_int,
                equate=False,
            ),
            _Option(
                ['-mlacc', 'mlacc'],
                """Option for optimization of branches at each NNI.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI,
                    and to optimize all 5 branches in 2 or 3 rounds.""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-slownni', 'slownni'],
                """Turn off heuristics to avoid constant subtrees with NNIs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -slownni to turn off heuristics to avoid constant subtrees (affects both
                    ML and ME NNIs).""",
            ),
            _Switch(
                ['-wag', 'wag'],
                """Maximum likelihood model options: Whelan-And-Goldman 2001 model instead of (default) Jones-Taylor-Thorton 1992 model (a.a. only)""",
            ),
            _Switch(
                ['-gtr', 'gtr'],
                """Maximum likelihood model options: Use generalized time-reversible instead of (default) Jukes-Cantor (nt only)""",
            ),
            _Option(
                ['-cat', 'cat'],
                """Maximum likelihood model options: Specify the number of rate categories of sites (default 20).""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-nocat', 'nocat'],
                """Maximum likelihood model options: No CAT model (just 1 category)""",
            ),
            _Switch(
                ['-gamma', 'gamma'],
                """Report the likelihood under the discrete gamma model.

                    Maximum likelihood model options:
                    -gamma -- after the final round of optimizing branch lengths with the CAT model,
                    report the likelihood under the discrete gamma model with the same
                    number of categories. FastTree uses the same branch lengths but
                    optimizes the gamma shape parameter and the scale of the lengths.
                    The final tree will have rescaled lengths. Used with -log, this
                    also generates per-site likelihoods for use with CONSEL, see
                    GammaLogToPaup.pl and documentation on the FastTree web site.""",
            ),
            _Switch(
                ['-slow', 'slow'],
                """Use an exhaustive search.

                    Searching for the best join:
                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
                    local hill-climbing as in relaxed neighbor-joining
                    -slow -- exhaustive search (like NJ or BIONJ, but different gap handling)
                    -slow takes half an hour instead of 8 seconds for 1,250 proteins""",
            ),
            _Switch(
                ['-fastest', 'fastest'],
                """Search the visible set (the top hit for each node) only.

                    Searching for the best join:
                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
                    local hill-climbing as in relaxed neighbor-joining
                    -fastest -- search the visible set (the top hit for each node) only
                    Unlike the original fast neighbor-joining, -fastest updates visible(C)
                    after joining A and B if join(AB,C) is better than join(C,visible(C))
                    -fastest also updates out-distances in a very lazy way,
                    -fastest sets -2nd on as well, use -fastest -no2nd to avoid this""",
            ),
            _Switch(
                ['-2nd', 'second'],
                """Turn 2nd-level top hits heuristic on.

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other

                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
                    This reduces memory usage and running time but may lead to
                    marginal reductions in tree quality.
                    (By default, -fastest turns on -2nd.)""",
            ),
            _Switch(
                ['-no2nd', 'no2nd'],
                """Turn 2nd-level top hits heuristic off.

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other

                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
                    This reduces memory usage and running time but may lead to
                    marginal reductions in tree quality.
                    (By default, -fastest turns on -2nd.)""",
            ),
            _Option(
                ['-seed', 'seed'],
                """Use -seed to initialize the random number generator.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-top', 'top'],
                """Top-hit list to speed up search

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other""",
            ),
            _Switch(
                ['-notop', 'notop'],
                """Turn off top-hit list to speed up search

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other""",
            ),
            _Option(
                ['-topm', 'topm'],
                """Change the top hits calculation method

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -topm 1.0 -- set the top-hit list size to parameter*sqrt(N)
                    FastTree estimates the top m hits of a leaf from the
                    top 2*m hits of a 'close' neighbor, where close is
                    defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m),
                    and updates the top-hits as joins proceed""",
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-close', 'close'],
                """Modify the close heuristic for the top-hit list

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -close 0.75 -- modify the close heuristic, lower is more conservative""",
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-refresh', 'refresh'],
                """Parameter for conditions that joined nodes are compared to other nodes

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -refresh 0.8 -- compare a joined node to all other nodes if its
                    top-hit list is less than 80% of the desired length,
                    or if the age of the top-hit list is log2(m) or greater""",
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-matrix', 'matrix'],
                """Specify a matrix for nucleotide or amino acid distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix""",
                filename=True,
                equate=False,
            ),
            _Switch(
                ['-nomatrix', 'nomatrix'],
                """Specify that no matrix should be used for nucleotide or amino acid distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix""",
            ),
            _Switch(
                ['-nj', 'nj'],
                """Join options: regular (unweighted) neighbor-joining (default)""",
            ),
            _Switch(
                ['-bionj', 'bionj'],
                """Join options: weighted joins as in BIONJ.  FastTree will also weight joins during NNIs""",
            ),
            _Option(
                ['-gtrrates', 'gtrrates'],
                """-gtrrates ac ag at cg ct gt""",
                equate=False,
            ),
            _Option(
                ['-gtrfreq', 'gtrfreq'],
                """-gtrfreq A C G T""",
                equate=False,
            ),
            _Option(
                ['-constraints', 'constraints'],
                """Specifies an alignment file for use with constrained topology searching

                    Constrained topology search options:
                    -constraints alignmentfile -- an alignment with values of 0, 1, and -
                    Not all sequences need be present. A column of 0s and 1s defines a
                    constrained split. Some constraints may be violated
                    (see 'violating constraints:' in standard error).""",
                filename=True,
                equate=False,
            ),
            _Option(
                ['-constraintWeight', 'constraintWeight'],
                """Weight strength of contraints in topology searching

                    Constrained topology search options:
                    -constraintWeight -- how strongly to weight the constraints. A value of 1
                    means a penalty of 1 in tree length for violating a constraint
                    Default: 100.0""",
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-log', 'log'],
                """Create log files of data such as intermediate trees and per-site rates

                    -log logfile -- save intermediate trees so you can extract
                    the trees and restart long-running jobs if they crash
                    -log also reports the per-site rates (1 means slowest category).""",
                filename=True,
                equate=False,
            ),
            _Option(
                ['-makematrix', 'makematrix'],
                """-makematrix [alignment]""",
                filename=True,
                equate=False,
            ),
            _Switch(
                ['-rawdist', 'rawdist'],
                """Use -rawdist to turn the log-correction off or to use %different instead of Jukes-Cantor in AA or NT distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix""",
            ),
            _Option(
                ['-sprlength', 'sprlength'],
                """Use -sprlength set the maximum length of a SPR move (default 10) in topology refinement

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(['-help', 'help'], """Show the help"""),
            _Switch(['-expert', 'expert'], """Show the expert level help"""),
            _Option(
                ['-out', 'out'],
                """Enter <output file>

                    The path to a Newick Tree output file needs to be specified.""",
                filename=True,
                equate=False,
            ),
            _Argument(
                ['input'],
                """Enter <input file>

                    An input file of sequence alignments in fasta or phylip format is needed.  By default FastTree expects protein
                    alignments, use -nt for nucleotides""",
                filename=True,
                is_required=True,
            ),
        ]

        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #38
0
ファイル: _Dialign.py プロジェクト: kaspermunch/sap
 def __init__(self, cmd="dialign2-2", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _Switch(
             ["-afc", "afc"],
             "Creates additional output file '*.afc' "
             "containing data of all fragments considered "
             "for alignment WARNING: this file can be HUGE !",
         ),
         _Switch(
             ["-afc_v", "afc_v"],
             "Like '-afc' but verbose: fragments are explicitly " "printed. WARNING: this file can be EVEN BIGGER !",
         ),
         _Switch(["-anc", "anc"], "Anchored alignment. Requires a file <seq_file>.anc " "containing anchor points."),
         _Switch(
             ["-cs", "cs"],
             "If segments are translated, not only the `Watson " "strand' but also the `Crick strand' is looked at.",
         ),
         _Switch(["-cw", "cw"], "Additional output file in CLUSTAL W format."),
         _Switch(
             ["-ds", "ds"],
             "`dna alignment speed up' - non-translated nucleic acid "
             "fragments are taken into account only if they start "
             "with at least two matches. Speeds up DNA alignment at "
             "the expense of sensitivity.",
         ),
         _Switch(["-fa", "fa"], "Additional output file in FASTA format."),
         _Switch(
             ["-ff", "ff"],
             "Creates file *.frg containing information about all "
             "fragments that are part of the respective optimal "
             "pairwise alignmnets plus information about "
             "consistency in the multiple alignment",
         ),
         _Option(["-fn", "fn"], "Output files are named <out_file>.<extension>.", equate=False),
         _Switch(
             ["-fop", "fop"],
             "Creates file *.fop containing coordinates of all "
             "fragments that are part of the respective pairwise alignments.",
         ),
         _Switch(
             ["-fsm", "fsm"],
             "Creates file *.fsm containing coordinates of all " "fragments that are part of the final alignment",
         ),
         _Switch(
             ["-iw", "iw"],
             "Overlap weights switched off (by default, overlap "
             "weights are used if up to 35 sequences are aligned). "
             "This option speeds up the alignment but may lead "
             "to reduced alignment quality.",
         ),
         _Switch(
             ["-lgs", "lgs"],
             "`long genomic sequences' - combines the following "
             "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
             "-fop, -ff, -cs, -ds, -pst ",
         ),
         _Switch(
             ["-lgs_t", "lgs_t"],
             "Like '-lgs' but with all segment pairs assessed "
             "at the peptide level (rather than 'mixed alignments' "
             "as with the '-lgs' option). Therefore faster than "
             "-lgs but not very sensitive for non-coding regions.",
         ),
         _Option(
             ["-lmax", "lmax"],
             "Maximum fragment length = x  (default: x = 40 or "
             "x = 120 for `translated' fragments). Shorter x "
             "speeds up the program but may affect alignment quality.",
             checker_function=lambda x: isinstance(x, int),
             equate=False,
         ),
         _Switch(
             ["-lo", "lo"],
             "(Long Output) Additional file *.log with information "
             "about fragments selected for pairwise alignment and "
             "about consistency in multi-alignment proceedure.",
         ),
         _Switch(
             ["-ma", "ma"],
             "`mixed alignments' consisting of P-fragments and "
             "N-fragments if nucleic acid sequences are aligned.",
         ),
         _Switch(
             ["-mask", "mask"],
             "Residues not belonging to selected fragments are "
             "replaced by `*' characters in output alignment "
             "(rather than being printed in lower-case characters)",
         ),
         _Switch(
             ["-mat", "mat"],
             "Creates file *mat with substitution counts derived "
             "from the fragments that have been selected for alignment.",
         ),
         _Switch(["-mat_thr", "mat_thr"], "Like '-mat' but only fragments with weight score " "> t are considered"),
         _Switch(
             ["-max_link", "max_link"],
             "'maximum linkage' clustering used to construct " "sequence tree (instead of UPGMA).",
         ),
         _Switch(["-min_link", "min_link"], "'minimum linkage' clustering used."),
         _Option(["-mot", "mot"], "'motif' option.", equate=False),
         _Switch(["-msf", "msf"], "Separate output file in MSF format."),
         _Switch(["-n", "n"], "Input sequences are nucleic acid sequences. " "No translation of fragments."),
         _Switch(
             ["-nt", "nt"],
             "Input sequences are nucleic acid sequences and "
             "`nucleic acid segments' are translated to `peptide "
             "segments'.",
         ),
         _Switch(
             ["-nta", "nta"],
             "`no textual alignment' - textual alignment suppressed. "
             "This option makes sense if other output files are of "
             "intrest -- e.g. the fragment files created with -ff, "
             "-fop, -fsm or -lo.",
         ),
         _Switch(["-o", "o"], "Fast version, resulting alignments may be slightly " "different."),
         _Switch(
             ["-ow", "ow"],
             "Overlap weights enforced (By default, overlap weights "
             "are used only if up to 35 sequences are aligned since "
             "calculating overlap weights is time consuming).",
         ),
         _Switch(
             ["-pst", "pst"],
             "'print status'. Creates and updates a file *.sta with "
             "information about the current status of the program "
             "run.  This option is recommended if large data sets "
             "are aligned since it allows the user to estimate the "
             "remaining running time.",
         ),
         _Switch(
             ["-smin", "smin"],
             "Minimum similarity value for first residue pair "
             "(or codon pair) in fragments. Speeds up protein "
             "alignment or alignment of translated DNA fragments "
             "at the expense of sensitivity.",
         ),
         _Option(
             ["-stars", "stars"],
             "Maximum number of `*' characters indicating degree "
             "of local similarity among sequences. By default, no "
             "stars are used but numbers between 0 and 9, instead.",
             checker_function=lambda x: x in range(0, 10),
             equate=False,
         ),
         _Switch(["-stdo", "stdo"], "Results written to standard output."),
         _Switch(
             ["-ta", "ta"],
             "Standard textual alignment printed (overrides "
             "suppression of textual alignments in special "
             "options, e.g. -lgs)",
         ),
         _Option(["-thr", "thr"], "Threshold T = x.", checker_function=lambda x: isinstance(x, int), equate=False),
         _Switch(
             ["-xfr", "xfr"],
             "'exclude fragments' - list of fragments can be "
             "specified that are NOT considered for pairwise alignment",
         ),
         _Argument(["input"], "Input file name. Must be FASTA format", filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #39
0
ファイル: _bwa.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("aln"),
                    _Argument(["reference"], "Reference file name",
                              filename=True, is_required=True),
                    _Argument(["read_file"], "Read file name",
                              filename=True, is_required=True),
                    _Option(["-n", "n"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-o", "o"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-e", "e"],
                            "Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-d", "d"],
                            "Disallow a long deletion within INT bp towards the 3-end [16]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-i", "i"],
                            "Disallow an indel within INT bp towards the ends [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-l", "l"],
                            """Take the first INT subsequence as seed.

                            If INT is larger than the query sequence, seeding will be disabled.
                            For long reads, this option is typically ranged from 25 to 35 for
                            -k 2. [inf]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-k", "k"], "Maximum edit distance in the seed [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"], "Number of threads (multi-threading mode) [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-M", "M"],
                            "Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-O", "O"], "Gap open penalty [11]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-E", "E"], "Gap extension penalty [4]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-R", "R"],
                            """Proceed with suboptimal alignments if there are no more than INT equally best hits.

                            This option only affects paired-end mapping. Increasing this threshold helps
                            to improve the pairing accuracy at the cost of speed, especially for short
                            reads (~32bp).""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            """Parameter for read trimming [0].

                            BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT
                            where l is the original read length.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-B", "B"],
                            "Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Switch(["-c", "c"],
                            "Reverse query but not complement it, which is required for alignment in the color space."),
                    _Switch(["-N", "N"],
                            "Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                    _Switch(["-I", "I"],
                            "The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                    _Switch(["-b", "b"],
                            "Specify the input read sequence file is the BAM format"),
                    _Switch(["-b1", "b1"],
                            "When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                    _Switch(["-b2", "b2"],
                            "When -b is specified, only use the second read in a read pair in mapping.")
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #40
0
ファイル: _samtools.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("view"),
            _Switch(["-b", "b"], "Output in the BAM format"),
            _Switch(["-c", "c"],
                    """Instead of printing the alignments, only count them and
                    print the total number.

                    All filter options, such as '-f', '-F' and '-q',
                    are taken into account"""),
            _Switch(["-h", "h"], "Include the header in the output"),
            _Switch(["-u", "u"],
                    """Output uncompressed BAM.

                    This option saves time spent on compression/decompression
                    and is thus preferred when the output is piped to
                    another samtools command"""),
            _Switch(["-H", "H"], "Output the header only"),
            _Switch(["-S", "S"],
                    """Input is in SAM.
                    If @SQ header lines are absent,
                    the '-t' option is required."""),
            _Option(["-t", "t"],
                    """This file is TAB-delimited.
                    Each line must contain the reference name and the
                    length of the reference, one line for each
                    distinct reference; additional fields are ignored.

                    This file also defines the order of the reference
                    sequences in sorting.
                    If you run   'samtools faidx <ref.fa>',
                    the resultant index file <ref.fa>.fai can be used
                    as this <in.ref_list> file.""",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"], "Output file",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """Only output alignments with all bits in
                    INT present in the FLAG field""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-F", "F"],
                    "Skip alignments with bits present in INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Skip alignments with MAPQ smaller than INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only output reads in read group STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-R", "R"],
                    "Output reads in read groups listed in FILE",
                    filename=True, equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    "Only output reads in library STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Switch(["-1", "fast_bam"],
                    "Use zlib compression level 1 to compress the output"),
            _Argument(["input", "input_file"],
                      "Input File Name", filename=True, is_required=True),
            _Argument(["region"], "Region", is_required=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #41
0
ファイル: _samtools.py プロジェクト: cbirdlab/sap
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("view"),
            _Switch(["-b", "b"], "Output in the BAM format"),
            _Switch(["-c", "c"],
                    """Instead of printing the alignments, only count them and
                    print the total number.

                    All filter options, such as '-f', '-F' and '-q',
                    are taken into account"""),
            _Switch(["-h", "h"], "Include the header in the output"),
            _Switch(["-u", "u"], """Output uncompressed BAM.

                    This option saves time spent on compression/decompression
                    and is thus preferred when the output is piped to
                    another samtools command"""),
            _Switch(["-H", "H"], "Output the header only"),
            _Switch(["-S", "S"], """Input is in SAM.
                    If @SQ header lines are absent,
                    the '-t' option is required."""),
            _Option(["-t", "t"],
                    """This file is TAB-delimited.
                    Each line must contain the reference name and the
                    length of the reference, one line for each
                    distinct reference; additional fields are ignored.

                    This file also defines the order of the reference
                    sequences in sorting.
                    If you run   'samtools faidx <ref.fa>',
                    the resultant index file <ref.fa>.fai can be used
                    as this <in.ref_list> file.""",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-o", "o"],
                    "Output file",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-f", "f"],
                    """Only output alignments with all bits in
                    INT present in the FLAG field""",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-F", "F"],
                    "Skip alignments with bits present in INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-q", "q"],
                    "Skip alignments with MAPQ smaller than INT",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Option(["-r", "r"],
                    "Only output reads in read group STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-R", "R"],
                    "Output reads in read groups listed in FILE",
                    filename=True,
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Option(["-l", "l"],
                    "Only output reads in library STR",
                    equate=False,
                    checker_function=lambda x: isinstance(x, str)),
            _Switch(["-1", "fast_bam"],
                    "Use zlib compression level 1 to compress the output"),
            _Argument(["input", "input_file"],
                      "Input File Name",
                      filename=True,
                      is_required=True),
            _Argument(["region"], "Region", is_required=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #42
0
    def __init__(self, cmd="XXmotif", **kwargs):
        # order of parameters is the same as in XXmotif --help
        _valid_alphabet = set("ACGTNX")

        self.parameters = \
          [
          _Argument(["outdir", "OUTDIR"],
                   "output directory for all results",
                   filename = True,
                   is_required = True,
                   # XXmotif currently does not accept spaces in the outdir name
                   checker_function = lambda x: " " not in x),
          _Argument(["seqfile", "SEQFILE"],
                   "file name with sequences from positive set in FASTA format",
                   filename = True,
                   is_required = True,
                   # XXmotif currently only accepts a pure filename
                   checker_function = lambda x: os.path.split(x)[0] == ""),

          # Options
          _Option(["--negSet", "negSet", "negset", "NEGSET"],
                   "sequence set which has to be used as a reference set",
                   filename = True,
                   equate = False),
          _Switch(["--zoops", "zoops", "ZOOPS"],
                   "use zero-or-one occurrence per sequence model (DEFAULT)"),
          _Switch(["--mops", "mops", "MOPS"],
                   "use multiple occurrence per sequence model"),
          _Switch(["--oops", "oops", "OOPS"],
                   "use one occurrence per sequence model"),
          _Switch(["--revcomp", "revcomp", "REVCOMP"],
                   "search in reverse complement of sequences as well (DEFAULT: NO)"),
          _Option(["--background-model-order", "background-model-order", "BACKGROUND-MODEL-ORDER"],
                   "order of background distribution (DEFAULT: 2, 8(--negset) )",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["--pseudo", "pseudo", "PSEUDO"],
                   "percentage of pseudocounts used (DEFAULT: 10)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["-g", "--gaps", "gaps", "GAPS"],
                   "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)",
                   checker_function = lambda x: x in [0-3],
                   equate = False),
          _Option(["--type", "type", "TYPE"],
                   "defines what kind of start seeds are used (DEFAULT: ALL)"
                   "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM",
                   checker_function = lambda x: x in ["ALL", "all",
                                                      "FIVEMERS", "fivemers",
                                                      "PALINDROME", "palindrome",
                                                      "TANDEM", "tandem",
                                                      "NOPALINDROME", "nopalindrome",
                                                      "NOTANDEM", "notandem"],
                   equate = False),
          _Option(["--merge-motif-threshold", "merge-motif-threshold", "MERGE-MOTIF-THRESHOLD"],
                   "defines the similarity threshold for merging motifs (DEFAULT: HIGH)"
                   "possible modes: LOW, MEDIUM, HIGH",
                   checker_function = lambda x: x in ["LOW", "low",
                                                      "MEDIUM", "medium",
                                                      "HIGH", "high"],
                   equate = False),
          _Switch(["--no-pwm-length-optimization", "no-pwm-length-optimization", "NO-PWM-LENGTH-OPTIMIZATION"],
                   "do not optimize length during iterations (runtime advantages)"),
          _Option(["--max-match-positions", "max-match-positions", "MAX-MATCH-POSITIONS"],
                   "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Switch(["--batch", "batch", "BATCH"],
                   "suppress progress bars (reduce output size for batch jobs)"),
          _Option(["--maxPosSetSize", "maxPosSetSize", "maxpossetsize", "MAXPOSSETSIZE"],
                   "maximum number of sequences from the positive set used [DEFAULT: all]",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          # does not make sense in biopython
          #_Switch(["--help", "help", "HELP"],
          #         "print this help page"),
          _Option(["--trackedMotif", "trackedMotif", "trackedmotif", "TRACKEDMOTIF"],
                   "inspect extensions and refinement of a given seed (DEFAULT: not used)",
                   checker_function = lambda x: any((c in _valid_alphabet) for c in x),
                   equate = False),

          # Using conservation information
          _Option(["--format", "format", "FORMAT"],
                   "defines what kind of format the input sequences have (DEFAULT: FASTA)",
                   checker_function = lambda x: x in ["FASTA", "fasta",
                                                      "MFASTA", "mfasta"],
                   equate = False),
          _Option(["--maxMultipleSequences", "maxMultipleSequences", "maxmultiplesequences", "MAXMULTIPLESEQUENCES"],
                   "maximum number of sequences used in an alignment [DEFAULT: all]",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # Using localization information
          _Switch(["--localization", "localization", "LOCALIZATION"],
                   "use localization information to calculate combined P-values"
                   "(sequences should have all the same length)"),
          _Option(["--downstream", "downstream", "DOWNSTREAM"],
                   "number of residues in positive set downstream of anchor point (DEFAULT: 0)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # Start with self defined motif
          _Option(["-m", "--startMotif", "startMotif", "startmotif", "STARTMOTIF"],
                   "Start motif (IUPAC characters)",
                   checker_function = lambda x: any((c in _valid_alphabet) for c in x),
                   equate = False),
          _Option(["-p", "--profileFile", "profileFile", "profilefile", "PROFILEFILE"],
                   "profile file",
                   filename = True,
                   equate = False),
          _Option(["--startRegion", "startRegion", "startregion", "STARTREGION"],
                   "expected start position for motif occurrences relative to anchor point (--localization)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["--endRegion", "endRegion", "endregion", "ENDREGION"],
                   "expected end position for motif occurrences relative to anchor point (--localization)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #43
0
ファイル: _bwa.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("aln"),
                    _Argument(["reference"], "Reference file name",
                              filename=True, is_required=True),
                    _Argument(["read_file"], "Read file name",
                              filename=True, is_required=True),
                    _Option(["-n", "n"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-o", "o"],
                            "Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",
                            checker_function=lambda x: isinstance(x, (int, float)),
                            equate=False),
                    _Option(["-e", "e"],
                            "Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-d", "d"],
                            "Disallow a long deletion within INT bp towards the 3-end [16]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-i", "i"],
                            "Disallow an indel within INT bp towards the ends [5]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-l", "l"],
                            """Take the first INT subsequence as seed.

                            If INT is larger than the query sequence, seeding will be disabled.
                            For long reads, this option is typically ranged from 25 to 35 for
                            -k 2. [inf]""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-k", "k"], "Maximum edit distance in the seed [2]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-t", "t"], "Number of threads (multi-threading mode) [1]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-M", "M"],
                            "Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-O", "O"], "Gap open penalty [11]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-E", "E"], "Gap extension penalty [4]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-R", "R"],
                            """Proceed with suboptimal alignments if there are no more than INT equally best hits.

                            This option only affects paired-end mapping. Increasing this threshold helps
                            to improve the pairing accuracy at the cost of speed, especially for short
                            reads (~32bp).""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-q", "q"],
                            """Parameter for read trimming [0].

                            BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT
                            where l is the original read length.""",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Option(["-B", "B"],
                            "Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",
                            checker_function=lambda x: isinstance(x, int),
                            equate=False),
                    _Switch(["-c", "c"],
                            "Reverse query but not complement it, which is required for alignment in the color space."),
                    _Switch(["-N", "N"],
                            "Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                    _Switch(["-I", "I"],
                            "The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                    _Switch(["-b", "b"],
                            "Specify the input read sequence file is the BAM format"),
                    _Switch(["-b1", "b1"],
                            "When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                    _Switch(["-b2", "b2"],
                            "When -b is specified, only use the second read in a read pair in mapping.")
                  ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #44
0
ファイル: _xxmotif.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd="XXmotif", **kwargs):
        # order of parameters is the same as in XXmotif --help
        _valid_alphabet = set("ACGTNX")

        self.parameters = \
          [
          _Argument(["outdir", "OUTDIR"],
                   "output directory for all results",
                   filename = True,
                   is_required = True,
                   # XXmotif currently does not accept spaces in the outdir name
                   checker_function = lambda x: " " not in x),
          _Argument(["seqfile", "SEQFILE"],
                   "file name with sequences from positive set in FASTA format",
                   filename = True,
                   is_required = True,
                   # XXmotif currently only accepts a pure filename
                   checker_function = lambda x: os.path.split(x)[0] == ""),

          # Options
          _Option(["--negSet", "negSet", "NEGSET", "negset"],
                   "sequence set which has to be used as a reference set",
                   filename = True,
                   equate = False),
          _Switch(["--zoops", "ZOOPS", "zoops"],
                   "use zero-or-one occurrence per sequence model (DEFAULT)"),
          _Switch(["--mops", "MOPS", "mops"],
                   "use multiple occurrence per sequence model"),
          _Switch(["--oops", "OOPS", "oops"],
                   "use one occurrence per sequence model"),
          _Switch(["--revcomp", "REVCOMP", "revcomp"],
                   "search in reverse complement of sequences as well (DEFAULT: NO)"),
          _Option(["--background-model-order", "background-model-order", "BACKGROUND-MODEL-ORDER",
                   "background_model_order"],
                   "order of background distribution (DEFAULT: 2, 8(--negset) )",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["--pseudo", "PSEUDO", "pseudo"],
                   "percentage of pseudocounts used (DEFAULT: 10)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["-g", "--gaps", "GAPS", "gaps"],
                   "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)",
                   checker_function = lambda x: x in [0-3],
                   equate = False),
          _Option(["--type", "TYPE", "type"],
                   "defines what kind of start seeds are used (DEFAULT: ALL)"
                   "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM",
                   checker_function = lambda x: x in ["ALL", "all",
                                                      "FIVEMERS", "fivemers",
                                                      "PALINDROME", "palindrome",
                                                      "TANDEM", "tandem",
                                                      "NOPALINDROME", "nopalindrome",
                                                      "NOTANDEM", "notandem"],
                   equate = False),
          _Option(["--merge-motif-threshold", "merge-motif-threshold", "MERGE-MOTIF-THRESHOLD",
                   "merge_motif_threshold"],
                   "defines the similarity threshold for merging motifs (DEFAULT: HIGH)"
                   "possible modes: LOW, MEDIUM, HIGH",
                   checker_function = lambda x: x in ["LOW", "low",
                                                      "MEDIUM", "medium",
                                                      "HIGH", "high"],
                   equate = False),
          _Switch(["--no-pwm-length-optimization", "no-pwm-length-optimization", "NO-PWM-LENGTH-OPTIMIZATION",
                   "no_pwm_length_optimization"],
                   "do not optimize length during iterations (runtime advantages)"),
          _Option(["--max-match-positions", "max-match-positions", "MAX-MATCH-POSITIONS",
                   "max_match_positions"],
                   "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Switch(["--batch", "BATCH", "batch"],
                   "suppress progress bars (reduce output size for batch jobs)"),
          _Option(["--maxPosSetSize", "maxPosSetSize", "MAXPOSSETSIZE", "maxpossetsize"],
                   "maximum number of sequences from the positive set used [DEFAULT: all]",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          # does not make sense in biopython
          #_Switch(["--help", "help", "HELP"],
          #         "print this help page"),
          _Option(["--trackedMotif", "trackedMotif", "TRACKEDMOTIF", "trackedmotif"],
                   "inspect extensions and refinement of a given seed (DEFAULT: not used)",
                   checker_function = lambda x: any((c in _valid_alphabet) for c in x),
                   equate = False),

          # Using conservation information
          _Option(["--format", "FORMAT", "format"],
                   "defines what kind of format the input sequences have (DEFAULT: FASTA)",
                   checker_function = lambda x: x in ["FASTA", "fasta",
                                                      "MFASTA", "mfasta"],
                   equate = False),
          _Option(["--maxMultipleSequences", "maxMultipleSequences", "MAXMULTIPLESEQUENCES",
                   "maxmultiplesequences"],
                   "maximum number of sequences used in an alignment [DEFAULT: all]",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # Using localization information
          _Switch(["--localization", "LOCALIZATION", "localization"],
                   "use localization information to calculate combined P-values"
                   "(sequences should have all the same length)"),
          _Option(["--downstream", "DOWNSTREAM", "downstream"],
                   "number of residues in positive set downstream of anchor point (DEFAULT: 0)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # Start with self defined motif
          _Option(["-m", "--startMotif", "startMotif", "STARTMOTIF", "startmotif"],
                   "Start motif (IUPAC characters)",
                   checker_function = lambda x: any((c in _valid_alphabet) for c in x),
                   equate = False),
          _Option(["-p", "--profileFile", "profileFile", "PROFILEFILE", "profilefile"],
                   "profile file",
                   filename = True,
                   equate = False),
          _Option(["--startRegion", "startRegion", "STARTREGION", "startregion"],
                   "expected start position for motif occurrences relative to anchor point (--localization)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["--endRegion", "endRegion", "ENDREGION", "endregion"],
                   "expected end position for motif occurrences relative to anchor point (--localization)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # XXmotif wrapper options
          _Switch(["--XXmasker", "masker"],
                   "mask the input sequences for homology, repeats and low complexity regions"),
          _Switch(["--XXmasker-pos", "maskerpos"],
                   "mask only the positive set for homology, repeats and low complexity regions"),
          _Switch(["--no-graphics", "nographics"],
                   "run XXmotif without graphical output"),
          ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #45
0
ファイル: _Fasttree.py プロジェクト: kaspermunch/sap
    def __init__(self, cmd='fasttree', **kwargs):
        self.parameters = [
            _Switch(['-nt', 'nt'],
                """By default FastTree expects protein alignments, use -nt for nucleotides""",
                ),
            _Option(['-n', 'n'],
                """-n -- read N multiple alignments in.

                    This only works with phylip interleaved format. For example, you can
                    use it with the output from phylip's seqboot. If you use -n, FastTree
                    will write 1 tree per line to standard output.""",
                checker_function=_is_int,
                equate=False,
                ),
            _Switch(['-quote', 'quote'],
                """-quote -- add quotes to sequence names in output.

                    Quote sequence names in the output and allow spaces, commas,
                    parentheses, and colons in them but not ' characters (fasta files only).""",
                ),
            _Option(['-pseudo', 'pseudo'],
                """-pseudo [weight] -- Pseudocounts are used with sequence distance estimation.

                    Use pseudocounts to estimate distances between sequences with little or no
                    overlap. (Off by default.) Recommended if analyzing the alignment has
                    sequences with little or no overlap.
                    If the weight is not specified, it is 1.0 """,
                checker_function=_is_numeric,
                equate=False,
                ),
            _Option(['-boot', 'boot'],
                """Specify the number of resamples for support values.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1

                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.""",
                checker_function=_is_int,
                equate=False,
                ),
            _Switch(['-nosupport', 'nosupport'],
                """Turn off support values.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1

                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.""",
                ),
            _Option(['-intree', 'intree'],
                """-intree newickfile -- read the starting tree in from newickfile.

                    Any branch lengths in the starting trees are ignored.
                    -intree with -n will read a separate starting tree for each alignment.""",
                filename=True,
                equate=False,
                ),
            _Option(['-intree1', 'intree1'],
                """-intree1 newickfile -- read the same starting tree for each alignment.""",
                filename=True,
                equate=False,
                ),
            _Switch(['-quiet', 'quiet'],
                """-quiet -- do not write to standard error during normal operation

                    (no progress indicator, no options summary, no likelihood values, etc.)""",
                ),
            _Switch(['-nopr', 'nopr'],
                """-nopr -- do not write the progress indicator to stderr.""",
                ),
            _Option(['-nni', 'nni'],
                """Set the rounds of minimum-evolution nearest-neighbor interchanges

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs.""",
                checker_function=_is_int,
                equate=False,
                ),
            _Option(['-spr', 'spr'],
                """Set the rounds of subtree-prune-regraft moves

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.""",
                checker_function=_is_int,
                equate=False,
                ),
            _Switch(['-noml', 'noml'],
                """Deactivate min-evo NNIs and SPRs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -noml to turn off both min-evo NNIs and SPRs (useful if refining
                    an approximately maximum-likelihood tree with further NNIs) """,
                ),
            _Switch(['-mllen', 'mllen'],
                """Optimize branch lengths on a fixed topology.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mllen to optimize branch lengths without ML NNIs
                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology.""",
                ),
            _Switch(['-nome', 'nome'],
                """Changes support values calculation to a minimum-evolution bootstrap method.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mllen to optimize branch lengths without ML NNIs
                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1.""",
                ),
            _Option(['-mlnni', 'mlnni'],
                """Set the number of rounds of maximum-likelihood NNIs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mlnni to set the number of rounds of maximum-likelihood NNIs.""",
                checker_function=_is_int,
                equate=False,
                ),
            _Option(['-mlacc', 'mlacc'],
                """Option for optimization of branches at each NNI.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI,
                    and to optimize all 5 branches in 2 or 3 rounds.""",
                checker_function=_is_int,
                equate=False,
                ),
            _Switch(['-slownni', 'slownni'],
                """Turn off heuristics to avoid constant subtrees with NNIs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -slownni to turn off heuristics to avoid constant subtrees (affects both
                    ML and ME NNIs).""",
                ),
            _Switch(['-wag', 'wag'],
                """Maximum likelihood model options: Whelan-And-Goldman 2001 model instead of (default) Jones-Taylor-Thorton 1992 model (a.a. only)""",
                ),
            _Switch(['-gtr', 'gtr'],
                """Maximum likelihood model options: Use generalized time-reversible instead of (default) Jukes-Cantor (nt only)""",
                ),
            _Option(['-cat', 'cat'],
                """Maximum likelihood model options: Specify the number of rate categories of sites (default 20).""",
                checker_function=_is_int,
                equate=False,
                ),
            _Switch(['-nocat', 'nocat'],
                """Maximum likelihood model options: No CAT model (just 1 category)""",
                ),
            _Switch(['-gamma', 'gamma'],
                """Report the likelihood under the discrete gamma model.

                    Maximum likelihood model options:
                    -gamma -- after the final round of optimizing branch lengths with the CAT model,
                    report the likelihood under the discrete gamma model with the same
                    number of categories. FastTree uses the same branch lengths but
                    optimizes the gamma shape parameter and the scale of the lengths.
                    The final tree will have rescaled lengths. Used with -log, this
                    also generates per-site likelihoods for use with CONSEL, see
                    GammaLogToPaup.pl and documentation on the FastTree web site.""",
                ),
            _Switch(['-slow', 'slow'],
                """Use an exhaustive search.

                    Searching for the best join:
                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
                    local hill-climbing as in relaxed neighbor-joining
                    -slow -- exhaustive search (like NJ or BIONJ, but different gap handling)
                    -slow takes half an hour instead of 8 seconds for 1,250 proteins""",
                ),
            _Switch(['-fastest', 'fastest'],
                """Search the visible set (the top hit for each node) only.

                    Searching for the best join:
                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
                    local hill-climbing as in relaxed neighbor-joining
                    -fastest -- search the visible set (the top hit for each node) only
                    Unlike the original fast neighbor-joining, -fastest updates visible(C)
                    after joining A and B if join(AB,C) is better than join(C,visible(C))
                    -fastest also updates out-distances in a very lazy way,
                    -fastest sets -2nd on as well, use -fastest -no2nd to avoid this""",
                ),
            _Switch(['-2nd', 'second'],
                """Turn 2nd-level top hits heuristic on.

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other

                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
                    This reduces memory usage and running time but may lead to
                    marginal reductions in tree quality.
                    (By default, -fastest turns on -2nd.)""",
                ),
            _Switch(['-no2nd', 'no2nd'],
                """Turn 2nd-level top hits heuristic off.

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other

                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
                    This reduces memory usage and running time but may lead to
                    marginal reductions in tree quality.
                    (By default, -fastest turns on -2nd.)""",
                ),
            _Option(['-seed', 'seed'],
                """Use -seed to initialize the random number generator.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1""",
                checker_function=_is_int,
                equate=False,
                ),
            _Switch(['-top', 'top'],
                """Top-hit list to speed up search

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other""",
                ),
            _Switch(['-notop', 'notop'],
                """Turn off top-hit list to speed up search

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other""",
                ),
            _Option(['-topm', 'topm'],
                """Change the top hits calculation method

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -topm 1.0 -- set the top-hit list size to parameter*sqrt(N)
                    FastTree estimates the top m hits of a leaf from the
                    top 2*m hits of a 'close' neighbor, where close is
                    defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m),
                    and updates the top-hits as joins proceed""",
                checker_function=_is_numeric,
                equate=False,
                ),
            _Option(['-close', 'close'],
                """Modify the close heuristic for the top-hit list

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -close 0.75 -- modify the close heuristic, lower is more conservative""",
                checker_function=_is_numeric,
                equate=False,
                ),
            _Option(['-refresh', 'refresh'],
                """Parameter for conditions that joined nodes are compared to other nodes

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -refresh 0.8 -- compare a joined node to all other nodes if its
                    top-hit list is less than 80% of the desired length,
                    or if the age of the top-hit list is log2(m) or greater""",
                checker_function=_is_numeric,
                equate=False,
                ),
            _Option(['-matrix', 'matrix'],
                """Specify a matrix for nucleotide or amino acid distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix""",
                filename=True,
                equate=False,
                ),
            _Switch(['-nomatrix', 'nomatrix'],
                """Specify that no matrix should be used for nucleotide or amino acid distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix""",
                ),
            _Switch(['-nj', 'nj'],
                """Join options: regular (unweighted) neighbor-joining (default)""",
                ),
            _Switch(['-bionj', 'bionj'],
                """Join options: weighted joins as in BIONJ.  FastTree will also weight joins during NNIs""",
                ),
            _Option(['-gtrrates', 'gtrrates'],
                """-gtrrates ac ag at cg ct gt""",
                equate=False,
                ),
            _Option(['-gtrfreq', 'gtrfreq'],
                """-gtrfreq A C G T""",
                equate=False,
                ),
            _Option(['-constraints', 'constraints'],
                """Specifies an alignment file for use with constrained topology searching

                    Constrained topology search options:
                    -constraints alignmentfile -- an alignment with values of 0, 1, and -
                    Not all sequences need be present. A column of 0s and 1s defines a
                    constrained split. Some constraints may be violated
                    (see 'violating constraints:' in standard error).""",
                filename=True,
                equate=False,
                ),
            _Option(['-constraintWeight', 'constraintWeight'],
                """Weight strength of contraints in topology searching

                    Constrained topology search options:
                    -constraintWeight -- how strongly to weight the constraints. A value of 1
                    means a penalty of 1 in tree length for violating a constraint
                    Default: 100.0""",
                checker_function=_is_numeric,
                equate=False,
                ),
            _Option(['-log', 'log'],
                """Create log files of data such as intermediate trees and per-site rates

                    -log logfile -- save intermediate trees so you can extract
                    the trees and restart long-running jobs if they crash
                    -log also reports the per-site rates (1 means slowest category).""",
                filename=True,
                equate=False,
                ),
            _Option(['-makematrix', 'makematrix'],
                """-makematrix [alignment]""",
                filename=True,
                equate=False,
                ),
            _Switch(['-rawdist', 'rawdist'],
                """Use -rawdist to turn the log-correction off or to use %different instead of Jukes-Cantor in AA or NT distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix""",
                ),
            _Option(['-sprlength', 'sprlength'],
                """Use -sprlength set the maximum length of a SPR move (default 10) in topology refinement

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.""",
                checker_function=_is_int,
                equate=False,
                ),
             _Switch(['-help', 'help'],
                """Show the help"""
                ),
             _Switch(['-expert', 'expert'],
                """Show the expert level help"""
                ),
             _Option(['-out', 'out'],
                """Enter <output file>

                    The path to a Newick Tree output file needs to be specified.""",
                filename=True,
                equate=False,
                ),
             _Argument(['input'],
                """Enter <input file>

                    An input file of sequence alignments in fasta or phylip format is needed.  By default FastTree expects protein
                    alignments, use -nt for nucleotides""",
                filename=True,
                is_required=True,
                ),
                ]

        AbstractCommandline.__init__(self, cmd, **kwargs)
コード例 #46
0
ファイル: _Dialign.py プロジェクト: cbirdlab/sap
 def __init__(self, cmd="dialign2-2", **kwargs):
     self.program_name = cmd
     self.parameters = \
         [
         _Switch(["-afc", "afc"],
                 "Creates additional output file '*.afc' "
                 "containing data of all fragments considered "
                 "for alignment WARNING: this file can be HUGE !"),
         _Switch(["-afc_v", "afc_v"],
                 "Like '-afc' but verbose: fragments are explicitly "
                 "printed. WARNING: this file can be EVEN BIGGER !"),
         _Switch(["-anc", "anc"],
                 "Anchored alignment. Requires a file <seq_file>.anc "
                 "containing anchor points."),
         _Switch(["-cs", "cs"],
                 "If segments are translated, not only the `Watson "
                 "strand' but also the `Crick strand' is looked at."),
         _Switch(["-cw", "cw"],
                 "Additional output file in CLUSTAL W format."),
         _Switch(["-ds", "ds"],
                 "`dna alignment speed up' - non-translated nucleic acid "
                 "fragments are taken into account only if they start "
                 "with at least two matches. Speeds up DNA alignment at "
                 "the expense of sensitivity."),
         _Switch(["-fa", "fa"],
                 "Additional output file in FASTA format."),
         _Switch(["-ff", "ff"],
                 "Creates file *.frg containing information about all "
                 "fragments that are part of the respective optimal "
                 "pairwise alignmnets plus information about "
                 "consistency in the multiple alignment"),
         _Option(["-fn", "fn"],
                 "Output files are named <out_file>.<extension>.",
                 equate=False),
         _Switch(["-fop", "fop"],
                 "Creates file *.fop containing coordinates of all "
                 "fragments that are part of the respective pairwise alignments."),
         _Switch(["-fsm", "fsm"],
                 "Creates file *.fsm containing coordinates of all "
                 "fragments that are part of the final alignment"),
         _Switch(["-iw", "iw"],
                 "Overlap weights switched off (by default, overlap "
                 "weights are used if up to 35 sequences are aligned). "
                 "This option speeds up the alignment but may lead "
                 "to reduced alignment quality."),
         _Switch(["-lgs", "lgs"],
                 "`long genomic sequences' - combines the following "
                 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, "
                 "-fop, -ff, -cs, -ds, -pst "),
         _Switch(["-lgs_t", "lgs_t"],
                 "Like '-lgs' but with all segment pairs assessed "
                 "at the peptide level (rather than 'mixed alignments' "
                 "as with the '-lgs' option). Therefore faster than "
                 "-lgs but not very sensitive for non-coding regions."),
         _Option(["-lmax", "lmax"],
                 "Maximum fragment length = x  (default: x = 40 or "
                 "x = 120 for `translated' fragments). Shorter x "
                 "speeds up the program but may affect alignment quality.",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         _Switch(["-lo", "lo"],
                 "(Long Output) Additional file *.log with information "
                 "about fragments selected for pairwise alignment and "
                 "about consistency in multi-alignment proceedure."),
         _Switch(["-ma", "ma"],
                 "`mixed alignments' consisting of P-fragments and "
                 "N-fragments if nucleic acid sequences are aligned."),
         _Switch(["-mask", "mask"],
                 "Residues not belonging to selected fragments are "
                 "replaced by `*' characters in output alignment "
                 "(rather than being printed in lower-case characters)"),
         _Switch(["-mat", "mat"],
                 "Creates file *mat with substitution counts derived "
                 "from the fragments that have been selected for alignment."),
         _Switch(["-mat_thr", "mat_thr"],
                 "Like '-mat' but only fragments with weight score "
                 "> t are considered"),
         _Switch(["-max_link", "max_link"],
                 "'maximum linkage' clustering used to construct "
                 "sequence tree (instead of UPGMA)."),
         _Switch(["-min_link", "min_link"],
                 "'minimum linkage' clustering used."),
         _Option(["-mot", "mot"],
                 "'motif' option.",
                 equate=False),
         _Switch(["-msf", "msf"],
                 "Separate output file in MSF format."),
         _Switch(["-n", "n"],
                 "Input sequences are nucleic acid sequences. "
                 "No translation of fragments."),
         _Switch(["-nt", "nt"],
                 "Input sequences are nucleic acid sequences and "
                 "`nucleic acid segments' are translated to `peptide "
                 "segments'."),
         _Switch(["-nta", "nta"],
                 "`no textual alignment' - textual alignment suppressed. "
                 "This option makes sense if other output files are of "
                 "intrest -- e.g. the fragment files created with -ff, "
                 "-fop, -fsm or -lo."),
         _Switch(["-o", "o"],
                 "Fast version, resulting alignments may be slightly "
                 "different."),
         _Switch(["-ow", "ow"],
                 "Overlap weights enforced (By default, overlap weights "
                 "are used only if up to 35 sequences are aligned since "
                 "calculating overlap weights is time consuming)."),
         _Switch(["-pst", "pst"],
                 "'print status'. Creates and updates a file *.sta with "
                 "information about the current status of the program "
                 "run.  This option is recommended if large data sets "
                 "are aligned since it allows the user to estimate the "
                 "remaining running time."),
         _Switch(["-smin", "smin"],
                 "Minimum similarity value for first residue pair "
                 "(or codon pair) in fragments. Speeds up protein "
                 "alignment or alignment of translated DNA fragments "
                 "at the expense of sensitivity."),
         _Option(["-stars", "stars"],
                 "Maximum number of `*' characters indicating degree "
                 "of local similarity among sequences. By default, no "
                 "stars are used but numbers between 0 and 9, instead.",
                 checker_function = lambda x: x in range(0, 10),
                 equate=False),
         _Switch(["-stdo", "stdo"],
                 "Results written to standard output."),
         _Switch(["-ta", "ta"],
                 "Standard textual alignment printed (overrides "
                 "suppression of textual alignments in special "
                 "options, e.g. -lgs)"),
         _Option(["-thr", "thr"],
                 "Threshold T = x.",
                 checker_function = lambda x: isinstance(x, int),
                 equate=False),
         _Switch(["-xfr", "xfr"],
                 "'exclude fragments' - list of fragments can be "
                 "specified that are NOT considered for pairwise alignment"),
         _Argument(["input"],
                   "Input file name. Must be FASTA format",
                   filename=True,
                   is_required=True),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)