Example #1
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"],
                   "Input file",
                   filename=True,
                   is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"],
                 "Prefix of BAM output",
                 filename=True,
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"],
                 "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"],
                 """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"],
                 """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #2
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("merge"),
         _Switch(["-n", "n"],
                 """The input alignments are sorted by read names
                 rather than by chromosomal coordinates"""),
         _Switch(["-r", "r"], """Attach an RG tag to each alignment.
                 The tag value is inferred from file names"""),
         _Switch(["-u", "u"], "Uncompressed BAM output"),
         _Switch(["-1", "fast_bam"], """Use zlib compression level 1
                                        to compress the output"""),
         _Switch(["-f", "f"], """Force to overwrite the
                                 output file if present"""),
         _Option(["-h", "h"], """Use the lines of FILE as '@'
                                 headers to be copied to out.bam""",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Option(["-R", "R"],
                 "Merge files in the specified region indicated by STR",
                 equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Argument(["output_bam", "out_bam", "out", "output"],
                   "Output BAM file",
                   filename=True, is_required=True),
         _ArgumentList(["input_bam", "in_bam", "input", "bam"],
                       "Input BAM",
                       filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #3
0
 def __init__(self, cmd='makeblastdb', **kwargs):
     assert cmd is not None
     self.parameters = [
         # Output configuration options
         _Option(["-out", "out"],
                 "Output file prefix for db.",
                 filename=True,
                 equate=False),
         _Option(["-in", "db"],
                 "The sequence create db with.",
                 filename=True,
                 equate=False),  # Should this be required?
         _Option(["-dbtype", "dbtype"],
                 "Molecule type of target db (string, 'nucl' or 'prot').",
                 equate=False)
     ]
     extra_parameters = [
         # Core:
         _Switch(["-h", "h"],
                 "Print USAGE and DESCRIPTION;  ignore other arguments."),
         _Switch(["-help", "help"],
                 "Print USAGE, DESCRIPTION and ARGUMENTS description; "
                 "ignore other arguments."),
         _Switch(["-version", "version"],
                 "Print version number;  ignore other arguments.")]
     try:
         # Insert extra parameters - at the start just in case there
         # are any arguments which must come last:
         self.parameters = extra_parameters + self.parameters
     except AttributeError:
         # Should we raise an error?  The subclass should have set this up!
         self.parameters = extra_parameters
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #4
0
    def __init__(self, cmd="supermatcher", **kwargs):

        self.parameters = [
            _Option(["-asequence", "asequence"],
                    "First sequence to align",
                    filename=True,
                    is_required=True),
            _Option(["-bsequence", "bsequence"],
                    "Second sequence to align",
                    filename=True,
                    is_required=True),
            _Option(["-gapopen", "gapopen"],
                    "Gap open penalty",
                    is_required=True),
            _Option(["-gapextend", "gapextend"],
                    "Gap extension penalty",
                    is_required=True),
            _Option(["-datafile", "datafile"], "Matrix file", filename=True),
            _Switch(["-nobrief", "nobrief"],
                    "Display extended identity and similarity"),
            _Switch(["-brief", "brief"],
                    "Display brief identity and similarity"),
            _Option(["-similarity", "similarity"],
                    "Display percent identity and similarity"),
            _Option(["-snucleotide", "snucleotide"],
                    "Sequences are nucleotide (boolean)"),
            _Option(["-sprotein", "sprotein"],
                    "Sequences are protein (boolean)"),
            _Option(["-aformat", "aformat"],
                    "Display output in a different specified output format")
        ]
        _EmbossCommandLine.__init__(self, cmd, **kwargs)
Example #5
0
 def __init__(self, cmd="samtools", **kwargs):
     """Initialize the class."""
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("phase"),
         _Argument(["input", "input_bam", "in_bam"], "Input file",
                   filename=True, is_required=True),
         _Switch(["-A", "A"], "Drop reads with ambiguous phase"),
         _Option(["-b", "b"], "Prefix of BAM output",
                 filename=True, equate=False,
                 checker_function=lambda x: isinstance(x, str)),
         _Switch(["-F", "F"], "Do not attempt to fix chimeric reads"),
         _Option(["-k", "k"], "Maximum length for local phasing",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-q", "q"], """Minimum Phred-scaled LOD to
                 call a heterozygote""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Option(["-Q", "Q"], """Minimum base quality to be
                 used in het calling""",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int))
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #6
0
	def __init__(self, cmd="supermatcher", **kwargs):

		self.parameters = [ 
		 _Option(["-asequence", "asequence"], 
				 "First sequence to align", 
				 filename=True, 
				 is_required=True),
		 _Option(["-bsequence", "bsequence"], 
				 "Second sequence to align", 
				 filename=True, 
				 is_required=True), 
		 _Option(["-gapopen", "gapopen"], 
				 "Gap open penalty", 
				 is_required=True), 
		 _Option(["-gapextend", "gapextend"], 
				 "Gap extension penalty", 
				 is_required=True), 
		 _Option(["-datafile", "datafile"], 
				 "Matrix file", 
				 filename=True), 

		 _Switch(["-nobrief", "nobrief"], 
				 "Display extended identity and similarity"), 
		 _Switch(["-brief", "brief"], 
				 "Display brief identity and similarity"),
		 _Option(["-similarity", "similarity"], 
				 "Display percent identity and similarity"), 
		 _Option(["-snucleotide", "snucleotide"], 
				 "Sequences are nucleotide (boolean)"), 
		 _Option(["-sprotein", "sprotein"], 
				 "Sequences are protein (boolean)"), 
		 _Option(["-aformat", "aformat"], 
				 "Display output in a different specified output format")] 

		_EmbossCommandLine.__init__(self, cmd, **kwargs)
Example #7
0
    def __init__(self, cmd="cmbuild", **kwargs):
        self.parameters = [

            _Option(['--gapthresh','gapthresh'],'Threshold for percent of gaps'
                    'in column to be considered as insertion',
                    equate=False),
            _Option(['--cmaxid','cmaxid'],'Maxmimum identity between sequences'
                    'in different clusters',equate=False),
            _Option(['--ctarget','ctarget'],'Number of clusters',equate=False),
            _Option(['--cdump','cdump'],'Dump the clusters into a file',equate=False),
            _Switch(['--corig','corig'],'Create a model from all the sequences'
                    'in addition to the clustered models'),
            _Switch(['--call','call'],'Create a model from each sequence in the sto file'),
            _Option(['--refine','refine'],'Refine the model by iteratively '
                    'generating a model and aligning the sequences to it, the '
                    'alignment is saved to the given filename',equate=False),
            _Switch(['--gibbs','gibbs'],'Gibbs sample from the refined models'
                    '(only with --refine)'),
            _Switch(['--rf','rf'],'Uses the RF line to build a model '
                    '(useful when expanding seed alignment that has a GF line from cmalign)'),
            _Option(['--rsearch','rsearch'],'build RSEARCH model using the '
                    'given RIBOSUM matrix (use only with --call or a single'
                    'input sequence',equate=False),
            _Argument(['modelname'],'name of model',filename=True,is_required=True),
            _Argument(['stkfile'],'multiple sequence alignment in Stockholm format',
                    filename=True,is_required=True)
            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
    def __init__(self, cmd="samtools", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(
                ["-s", "s"],
                """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only""",
            ),
            _Switch(
                ["-S", "S"],
                """Treat paired-end reads
                                    as single-end reads""",
            ),
            _Argument(
                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
                "Name Sorted Alignment File ",
                filename=True,
                is_required=True,
            ),
            _Argument(
                ["out_bam", "output_bam", "output", "output_file"],
                "Output file",
                filename=True,
                is_required=True,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
    def __init__(self, cmd="samtools", **kwargs):
        """Initialize the class."""
        self.program_name = cmd

        # options for version samtools 0.0.19
        self.parameters = [
            _StaticArgument("sort"),
            _Switch(
                ["-o", "o"],
                """Output the final alignment
                                    to the standard output""",
            ),
            _Switch(
                ["-n", "n"],
                """Sort by read names rather
                                    than by chromosomal coordinates""",
            ),
            _Option(
                ["-m", "m"],
                "Approximately the maximum required memory",
                equate=False,
                checker_function=lambda x: isinstance(x, int),
            ),
            _Argument(["input"], "Input BAM file", filename=True, is_required=True),
            _Argument(["out_prefix"], "Output prefix", filename=True, is_required=True),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #10
0
 def __init__(self, cmd="psicov", **kwargs):
     self.parameters = [
         _Switch(["-a", "lasso"], "use approximate Lasso algorithm"),
         _Switch(["-n", "noshrink"], "don't pre-shrink the sample covariance matrix"),
         _Switch(["-f", "filter"], "filer low-scoring contacts"),
         _Switch(["-p", "ppv_output"], "output PPV estimates rather than raw scores"),
         _Switch(["-l", "noapc"], "don't apply APC to Lasso output"),
         _Option(["-r", "rho"], "set initial rho paramter", equate=False),
         _Option(
             ["-d", "sparsity"], "set target precision matrix sparsity [default: 0; not specified]", equate=False
         ),
         _Option(["-t", "convergence_threshold"], "set Lasso convergence threshold [default: 1e-4]", equate=False),
         _Option(
             ["-i", "blosum_weighting"],
             "select BLOSUM-like weighting with given identity threshold "
             "[default selects threshold automatically]",
             equate=False,
         ),
         _Option(["-c", "pseudocount"], "set pseudocount value [default: 1]", equate=False),
         _Option(["-j", "sequence_separation"], "set minimum sequence sparation [default: 5]", equate=False),
         _Option(["-g", "gap_fraction"], "set maximum fraction of gaps [default: 0.9]", equate=False),
         _Option(["-z", "nr_threads"], "set maximum number of threads", equate=False),
         _Option(["-b", "rho_parameter_file"], "read rho parameter file", filename=True, equate=False),
         _Argument(["alnfile"], "Input alignment file [JONES format]", filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #11
0
 def __init__(self, cmd="hmmscan", **kwargs):
     assert cmd is not None
     self.parameters = [
        _Switch(["--cut_ga", "cut_ga"],
             "Gathering Cutoff"),
        _Switch(["--cut_nc", "cut_nc"],
             "Noise Cutoff"),
        _Switch(["--cut_tc", "cut_tc"],
             "Trusted Cutoff"),
        _Switch(["-h", "help"],
                 "Print USAGE, DESCRIPTION and ARGUMENTS description;  ignore other arguments."),
        _Switch(["--acc", "accession"],
                 "prefer accessions over names in output"),
        _Option(["--cpu", "cpu"],
                 "number of parallel CPU workers to use for multithreads"),
        _Option(["-o", "out"],
                 "Output File", filename=True, equate=False ),
        _Argument(["hmm"],
                   "HMM Library",
                   checker_function=os.path.exists,
                   filename=True,
                   is_required=True),
         _Argument(["input"],
                   "FASTA Query file",
                   checker_function=os.path.exists,
                   filename=True,
                   is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #12
0
 def __init__(self, cmd="ccmpred", **kwargs):
     self.parameters = [
         _Option(["-n", "numiter"], "Compute a maximum of NUMITER operations [default: 50]", equate=False),
         _Option(
             ["-e", "epsilon"],
             "Set convergence criterion for minimum decrease in the " "last K iterations to EPSILON [default: 0.01]",
             equate=False,
         ),
         _Option(["-k", "lastk"], "Set K parameter for convergence criterion to LASTK [default: 5]", equate=False),
         _Option(["-i", "inifile"], "Read initial weights from INIFILE", filename=True, equate=False),
         _Option(["-r", "rawfile"], "Store raw prediction matrix in RAWFILE", filename=True, equate=False),
         _Option(
             ["-t", "threads"],
             "Calculate using THREADS threads on the CPU (automatically disables CUDA if available) [default: 1]",
             equate=False,
         ),
         _Option(
             ["-w", "idthres"], "Set sequence reweighting identity threshold to IDTHRES [default: 0.8]", equate=False
         ),
         _Option(
             ["-l", "lfactor"],
             "Set pairwise regularization coefficients to LFACTOR * (L-1) [default: 0.2]",
             equate=False,
         ),
         _Switch(["-A", "apc"], "Disable average product correction (APC)"),
         _Switch(["-R", "renormalize"], "Re-normalize output matrix to [0,1]"),
         _Argument(["alnfile"], "Input alignment file [JONES format]", filename=True, is_required=True),
         _Argument(["matfile"], "Output matrix file", filename=True, is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #13
0
    def __init__(self, cmd="novoindex", **kwargs):

        self.parameters = \
            [
            _Option(["-k", "kmer"], ["input", "option"],
                    lambda x: isinstance(x, types.IntType),
                    0, "k-mer length used for the index, typically 14 [default: set by the program]",
                    0),
            _Option(["-s", "step"], ["input", "option"],
                    lambda x: isinstance(x, types.IntType),
                    0, "Step size used for the index, typically from 1 to 3 [default: set by the program]",
                    0),
            _Option(["-n", "name"], ["input", "option"],
                    lambda x: isinstance(x, types.StringType),
                    0, "Internal name for the reference sequence [default: indexfile nam]",
                    0),
            _Argument(["-i", "indexfile"], ["input", "option"],
                      lambda x: isinstance(x, types.StringType),
                      0, "Indexed reference sequence generated by novoindex"),
            _Argument(["-f", "sequencefiles"], ["input", "option"],
                      lambda x: isinstance(x, types.StringType),
                      0, "List of sequence files to include in the index"),
            _Switch(["-m", "masking"], ["input"],
                    "Lower case masking, if included lower case sequences are not indexed"),
            _Switch(["-b", "bisulphite"], ["input"],
                    "Turns on bisulphite mode, creating index based on C->T and G->A conversion")
            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
 def __init__(self, cmd="msaprobs", **kwargs):
     # order of parameters is the same as in msaprobs -help
     self.parameters = \
         [
         _Option(["-o", "--outfile", "outfile"],
                 "specify the output file name (STDOUT by default)",
                 filename=True,
                 equate=False),
         _Option(["-num_threads", "numthreads"],
                 "specify the number of threads used, and otherwise detect automatically",
                 checker_function=lambda x: isinstance(x, int)),
         _Switch(["-clustalw", "clustalw"],
                 "use CLUSTALW output format instead of FASTA format"),
         _Option(["-c", "consistency"],
                 "use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
                 checker_function=lambda x: isinstance(x, int) and 0 <= x <= 5),
         _Option(["-ir", "--iterative-refinement", "iterative_refinement"],
                 "use 0 <= REPS <= 1000 (default: 10) passes of iterative-refinement",
                 checker_function=lambda x: isinstance(x, int) and 0 <= x <= 1000),
         _Switch(["-v", "verbose"],
                 "report progress while aligning (default: off)"),
         _Option(["-annot", "annot"],
                 "write annotation for multiple alignment to FILENAME",
                 filename=True),
         _Switch(["-a", "--alignment-order", "alignment_order"],
                 "print sequences in alignment order rather than input order (default: off)"),
         _Option(["-version", "version"],
                 "print out version of MSAPROBS"),
         _Argument(["infile"],
                 "Multiple sequence input file",
                 filename=True),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
 def __init__(self, cmd='makeblastdb', **kwargs):
     assert cmd is not None
     extra_parameters = [
         # Core:
         _Switch(["-h", "h"],
                 "Print USAGE and DESCRIPTION;  ignore other arguments."),
         _Switch(["-help", "help"],
                 "Print USAGE, DESCRIPTION and ARGUMENTS description; "
                 "ignore other arguments."),
         _Switch(["-version", "version"],
                 "Print version number;  ignore other arguments."),
         # Output configuration options
         _Option(["-out", "out"],
                 "Output file prefix for db.",
                 filename=True,
                 equate=False),
         _Option(["-in", "db"],
                 "The sequence create db with.",
                 filename=True,
                 equate=False),  # Should this be required?
         _Option(["-dbtype", "dbtype"],
                 "Molecule type of target db (string, 'nucl' or 'prot').",
                 equate=False)
     ]
     try:
         # Insert extra parameters - at the start just in case there
         # are any arguments which must come last:
         self.parameters = extra_parameters + self.parameters
     except AttributeError:
         # Should we raise an error?  The subclass should have set this up!
         self.parameters = extra_parameters
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #16
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("rmdup"),
            _Switch(
                ["-s", "s"],
                """Remove duplicates for single-end reads.

                    By default, the command works for paired-end
                    reads only""",
            ),
            _Switch(
                ["-S", "S"],
                """Treat paired-end reads
                                    as single-end reads""",
            ),
            _Argument(
                ["in_bam", "sorted_bam", "input_bam", "input", "input_file"],
                "Name Sorted Alignment File ",
                filename=True,
                is_required=True,
            ),
            _Argument(
                ["out_bam", "output_bam", "output", "output_file"], "Output file", filename=True, is_required=True
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #17
0
 def __init__(self, cmd="msaprobs", **kwargs):
     # order of parameters is the same as in msaprobs -help
     self.parameters = \
         [
         _Option(["-o", "--outfile", "outfile"],
                 "specify the output file name (STDOUT by default)",
                 filename=True,
                 equate=False),
         _Option(["-num_threads", "numthreads"],
                 "specify the number of threads used, and otherwise detect automatically",
                 checker_function=lambda x: isinstance(x, int)),
         _Switch(["-clustalw", "clustalw"],
                 "use CLUSTALW output format instead of FASTA format"),
         _Option(["-c", "consistency"],
                 "use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
                 checker_function=lambda x: isinstance(x, int) and 0 <= x <= 5),
         _Option(["-ir", "--iterative-refinement", "iterative_refinement"],
                 "use 0 <= REPS <= 1000 (default: 10) passes of iterative-refinement",
                 checker_function=lambda x: isinstance(x, int) and 0 <= x <= 1000),
         _Switch(["-v", "verbose"],
                 "report progress while aligning (default: off)"),
         _Option(["-annot", "annot"],
                 "write annotation for multiple alignment to FILENAME",
                 filename=True),
         _Switch(["-a", "--alignment-order", "alignment_order"],
                 "print sequences in alignment order rather than input order (default: off)"),
         _Option(["-version", "version"],
                 "print out version of MSAPROBS"),
         _Argument(["infile"],
                 "Multiple sequence input file",
                 filename=True),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #18
0
    def __init__(self, cmd="t_coffee", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-output", "output"],
                """Specify the output type.

                    One (or more separated by a comma) of:
                    'clustalw_aln', 'clustalw', 'gcg', 'msf_aln',
                    'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq'

                    Note that of these Biopython's AlignIO module will only
                    read clustalw, pir, and fasta.
                    """,  # TODO - Can we read the PHYLIP output?
                equate=False),
            _Option(["-infile", "infile"],
                    "Specify the input file.",
                    filename=True,
                    is_required=True,
                    equate=False),
            # Indicates the name of the alignment output by t_coffee. If the
            # default is used, the alignment is named <your sequences>.aln
            _Option(["-outfile", "outfile"],
                    "Specify the output file. Default: <your sequences>.aln",
                    filename=True,
                    equate=False),
            _Switch(["-convert", "convert"],
                    "Specify you want to perform a file conversion"),
            _Option(["-type", "type"],
                    "Specify the type of sequence being aligned",
                    checker_function=lambda x: x in self.SEQ_TYPES,
                    equate=False),
            _Option(["-outorder", "outorder"],
                    "Specify the order of sequence to output"
                    "Either 'input', 'aligned' or <filename> of "
                    "Fasta file with sequence order",
                    equate=False),
            _Option(["-matrix", "matrix"],
                    "Specify the filename of the substitution matrix to use."
                    "Default: blosum62mt",
                    equate=False),
            _Option(["-gapopen", "gapopen"],
                    "Indicates the penalty applied for opening a gap "
                    "(negative integer)",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Option(["-gapext", "gapext"],
                    "Indicates the penalty applied for extending a gap "
                    "(negative integer)",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            _Switch(["-quiet", "quiet"], "Turn off log output"),
            _Option(
                ["-mode", "mode"],
                "Specifies a special mode: genome, quickaln, dali, 3dcoffee",
                equate=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #19
0
    def __init__(self, cmd="t_coffee", **kwargs):
        self.parameters = [
            _Option(
                ["-output", "output"],
                """Specify the output type.
                   One (or more separated by a comma) of:
                   'clustalw_aln', 'clustalw', 'gcg', 'msf_aln',
                   'pir_aln', 'fasta_aln', 'phylip', 'pir_seq', 'fasta_seq'

                   Note that of these Biopython's AlignIO module will only
                   read clustalw, pir, and fasta.
                   """,  # TODO - Can we read the PHYLIP output?
                equate=False,
            ),
            _Option(["-infile", "infile"], "Specify the input file.", filename=True, is_required=True, equate=False),
            # Indicates the name of the alignment output by t_coffee. If the
            # default is used, the alignment is named <your sequences>.aln
            _Option(
                ["-outfile", "outfile"],
                "Specify the output file. Default: <your sequences>.aln",
                filename=True,
                equate=False,
            ),
            _Switch(["-convert", "convert"], "Specify you want to perform a file conversion"),
            _Option(
                ["-type", "type"],
                "Specify the type of sequence being aligned",
                checker_function=lambda x: x in self.SEQ_TYPES,
                equate=False,
            ),
            _Option(
                ["-outorder", "outorder"],
                "Specify the order of sequence to output"
                "Either 'input', 'aligned' or <filename> of "
                "Fasta file with sequence order",
                equate=False,
            ),
            _Option(
                ["-matrix", "matrix"],
                "Specify the filename of the substitution matrix to use." "Default: blosum62mt",
                equate=False,
            ),
            _Option(
                ["-gapopen", "gapopen"],
                "Indicates the penalty applied for opening a gap " "(negative integer)",
                checker_function=lambda x: isinstance(x, int),
                equate=False,
            ),
            _Option(
                ["-gapext", "gapext"],
                "Indicates the penalty applied for extending a " "gap. (negative integer)",
                checker_function=lambda x: isinstance(x, int),
                equate=False,
            ),
            _Switch(["-quiet", "quiet"], "Turn off log output"),
            _Option(["-mode", "mode"], "Specifies a special mode: genome, quickaln, dali, 3dcoffee", equate=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #20
0
 def __init__(self, cmd="needleall", **kwargs):
     """Initialize the class."""
     self.parameters = [
         _Option(
             ["-asequence", "asequence"],
             "First sequence to align",
             filename=True,
             is_required=True,
         ),
         _Option(
             ["-bsequence", "bsequence"],
             "Second sequence to align",
             filename=True,
             is_required=True,
         ),
         _Option(["-gapopen", "gapopen"],
                 "Gap open penalty",
                 is_required=True),
         _Option(["-gapextend", "gapextend"],
                 "Gap extension penalty",
                 is_required=True),
         _Option(["-datafile", "datafile"], "Matrix file", filename=True),
         _Option(
             ["-minscore", "minscore"],
             "Exclude alignments with scores below this threshold score.",
         ),
         _Option(["-errorfile", "errorfile"],
                 "Error file to be written to."),
         _Option(["-endweight", "endweight"], "Apply And gap penalties"),
         _Option(
             ["-endopen", "endopen"],
             "The score taken away when an end gap is created.",
         ),
         _Option(
             ["-endextend", "endextend"],
             "The score added to the end gap penality for each base or "
             "residue in the end gap.",
         ),
         _Switch(["-nobrief", "nobrief"],
                 "Display extended identity and similarity"),
         _Switch(["-brief", "brief"],
                 "Display brief identity and similarity"),
         _Option(["-similarity", "similarity"],
                 "Display percent identity and similarity"),
         _Option(["-snucleotide", "snucleotide"],
                 "Sequences are nucleotide (boolean)"),
         _Option(["-sprotein", "sprotein"],
                 "Sequences are protein (boolean)"),
         _Option(
             ["-aformat", "aformat"],
             "Display output in a different specified output format",
         ),
     ]
     _EmbossCommandLine.__init__(self, cmd, **kwargs)
Example #21
0
    def __init__(self, cmd="cmsearch", **kwargs):
        self.parameters = [
            _Option(['--forecast','forecast'],'Forecast the time of execution, not searching',
                    equate=False,is_required=False),
            _Switch(['--ga','usega'],'Search above the defined cutoff'),
            _Option(['-Z','size'],'Database size in MB',equate=False),
            _Switch(['-g','glocal'],'Performs a glocal alignment search'),
            _Switch(['--noalign','noalign'],'Prints just the start, stop, score'),
            _Argument(['modelname'],'name of model',filename=True,is_required=True),
            _Argument(['database'],'Database file', filename=True,is_required=True)

            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #22
0
 def __init__(self, cmd="hmmpress", **kwargs):
     assert cmd is not None
     self.parameters = [
        _Switch(["-h", "help"],
                 "Print USAGE, DESCRIPTION and ARGUMENTS description;  ignore other arguments."),
        _Switch(["-f", "force"],
                 "force: overwrite any previous pressed files"),
        _Argument(["hmm"],
                   "HMM Library",
                   checker_function=os.path.exists,
                   filename=True,
                   is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #23
0
 def __init__(self, cmd='pal2nal', **kwargs):
     # order parameters in the same order as invoking guidance on the cmd line (e.g. 'perl guidance.pl')
     self.parameters = \
         [
             # Required Parameters
             _Argument(['pepaln'],
                       'protein alignment either in CLUSTAL or FASTA format',
                       filename=True, is_required=True,
                       checker_function=lambda x: Path(x).is_file()),
             _Argument(['nucfasta'],
                       'DNA sequences (single multi-fasta or separated files)',
                       filename=True, is_required=True,
                       checker_function=lambda x: Path(x).is_file()),
             _Switch(['-h', 'help'],
                     'Show help'),
             _Option(['-output', 'output'],
                     "Output format (clustal|paml|fasta|codon); default = clustal",
                     equate=False,
                     checker_function=lambda x: x in ['clustal', 'paml', 'fasta', 'codon']),
             _Switch(['-blockonly', 'blockonly'],
                     "Show only user specified blocks '#' under CLUSTAL alignment (see example)"),
             _Switch(['-nogap', 'nogap'],
                     "Remove columns with gaps and inframe stop codons"),
             _Switch(['-nomismatch', 'nomismatch'],
                     "Remove mismatched codons (mismatch between pep and cDNA) from the output"),
             _Option(['-codontable', 'codontable'],
                     "   1  Universal code (default)\
                         2  Vertebrate mitochondrial code\
                         3  Yeast mitochondrial code\
                         4  Mold, Protozoan, and Coelenterate Mitochondrial code\
                            and Mycoplasma/Spiroplasma code\
                         5  Invertebrate mitochondrial\
                         6  Ciliate, Dasycladacean and Hexamita nuclear code\
                         9  Echinoderm and Flatworm mitochondrial code\
                         10  Euplotid nuclear code\
                         11  Bacterial, archaeal and plant plastid code\
                         12  Alternative yeast nuclear code\
                         13  Ascidian mitochondrial code\
                         14  Alternative flatworm mitochondrial code\
                         15  Blepharisma nuclear code\
                         16  Chlorophycean mitochondrial code"                                                                 ,
                     equate=False,
                     checker_function=lambda x: isinstance(x, int)),
             _Option(['>', 'output_file'],
                     "This issues the bash command that redirects the PAL2NAL"
                     "alignment to a particular file",
                     filename=True, equate=False, is_required=True)
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #24
0
    def __init__(self, cmd="RNAalifold", **kwargs):
        self.parameters = [
            _Option(['','filename'],'The MSA file',equate=False,filename=True),
            _Option(['-cv','covariance'],'Set  the  weight  of  the  covariance  term  in the energy function to factor',
                    equate=False),
            _Option(['-nc','non-compatible'],'Set the penalty for non-compatible sequences in the  covariance  term  of  the energy function to factor',
                    equate=False),
            _Switch(['-mis','mis'],'Output  "most informative sequence" instead of simple consensus'),
            _Switch(['-E','endgaps'],'Score pairs with endgaps same as gap-gap pairs'),
            _Switch(['-p','partition'],'Calculate  the partition function and base pairing probability matrix'),
            _Switch(['-color','color'],'Produce a colored version of the consensus strcture plot "alirna.ps"'),
            _Switch(['-aln','alignment'],'Produce  a  colored  and structure annotated alignment in PostScript format in the file "aln.ps" in the current directory')
            ]

        _ViennaMinimalCommandLine.__init__(self, cmd, **kwargs)
Example #25
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("index"),
                    _Option(["-a", "a", "algorithm"],
                            """Algorithm for constructing BWT index.

                            Available options are:
                             - is:    IS linear-time algorithm for constructing suffix array.
                                      It requires 5.37N memory where N is the size of the database.
                                      IS is moderately fast, but does not work with database larger
                                      than 2GB. IS is the default algorithm due to its simplicity.
                             - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                                      whole human genome, but it does not work with database
                                      smaller than 10MB and it is usually slower than IS.""",
                            checker_function=lambda x: x in ["is", "bwtsw"],
                            equate=False, is_required=True),
                    _Option(["-p", "p", "prefix"],
                            "Prefix of the output database [same as db filename]",
                            equate=False, is_required=False),
                    _Argument(["infile"], "Input file name", filename=True, is_required=True),
                    _Switch(["-c", "c"],
                            "Build color-space index. The input fasta should be in nucleotide space.")
                ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #26
0
    def __init__(self, cmd="bwa", **kwargs):
        self.program_name = cmd
        self.parameters = \
                [
                    _StaticArgument("index"),
                    _Option(["-a", "a", "algorithm"],
                            """Algorithm for constructing BWT index.

                            Available options are:
                             - is:    IS linear-time algorithm for constructing suffix array.
                                      It requires 5.37N memory where N is the size of the database.
                                      IS is moderately fast, but does not work with database larger
                                      than 2GB. IS is the default algorithm due to its simplicity.
                             - bwtsw: Algorithm implemented in BWT-SW. This method works with the
                                      whole human genome, but it does not work with database
                                      smaller than 10MB and it is usually slower than IS.""",
                            checker_function=lambda x: x in ["is", "bwtsw"],
                            equate=False, is_required=True),
                    _Option(["-p", "p", "prefix"],
                            "Prefix of the output database [same as db filename]",
                            equate=False, is_required=False),
                    _Argument(["infile"], "Input file name", filename=True, is_required=True),
                    _Switch(["-c", "c"],
                            "Build color-space index. The input fasta should be in nucleotide space.")
                ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd

        # options for version samtools 1.3.1
        self.parameters = [
            _StaticArgument("sort"),
            _Switch(["-n", "n"], """Sort by read names rather
                                    than by chromosomal coordinates"""),
            _Option(["-o", "o"], """(file) Write the final sorted output to FILE,
                    rather than to standard output""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-O", "O"], """(FORMAT) Write the final output as sam, bam, or cram""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-T", "T"], """(PREFIX) Write temporary files to PREFIX.nnnn.bam, or if the specified PREFIX
                    is an existing directory, to PREFIX/samtools.mmm.mmm.tmp.nnnn.bam,
                    where mmm is unique to this invocation of the sort command""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-I", "I"], """(INT) Set the desired compression level for the final output file,
                    ranging from 0 (uncompressed) or 1 (fastest but minimal compression)
                    to 9 (best compression but slowest to write), similarly to gzip(1)'s compression level setting.""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-m", "m"], "Approximately the maximum required memory",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input"], "Input SAM/BAM/CRAM file",
                      filename=True, is_required=True),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #28
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd

        # options for version samtools 1.3.1
        self.parameters = [
            _StaticArgument("sort"),
            _Switch(["-n", "n"], """Sort by read names rather
                                    than by chromosomal coordinates"""),
            _Option(["-o", "o"], """(file) Write the final sorted output to FILE,
                    rather than to standard output""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-O", "O"], """(FORMAT) Write the final output as sam, bam, or cram""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-T", "T"], """(PREFIX) Write temporary files to PREFIX.nnnn.bam, or if the specified PREFIX
                    is an existing directory, to PREFIX/samtools.mmm.mmm.tmp.nnnn.bam,
                    where mmm is unique to this invocation of the sort command""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-I", "I"], """(INT) Set the desired compression level for the final output file,
                    ranging from 0 (uncompressed) or 1 (fastest but minimal compression)
                    to 9 (best compression but slowest to write), similarly to gzip(1)'s compression level setting.""",
                    equate=False, checker_function=lambda x: isinstance(x, str)),
            _Option(["-m", "m"], "Approximately the maximum required memory",
                    equate=False,
                    checker_function=lambda x: isinstance(x, int)),
            _Argument(["input"], "Input SAM/BAM/CRAM file",
                      filename=True, is_required=True),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #29
0
    def __init__(self, cmd=None, **kwargs):
        assert cmd is not None
        extra_parameters = [\
           _Option(["-outfile","outfile"], ["output", "file"], None, 0,
                   "Output filename"),
           _Switch(["-auto","auto"], [],
                   """Turn off prompts.
           
                   Automatic mode disables prompting, so we recommend you set
                   this argument all the time when calling an EMBOSS tool from
                   Biopython.
                   """),
           _Switch(["-stdout","stdout"], [],
                   "Write standard output."),
           _Switch(["-filter","filter"], [],
                   "Read standard input, write standard output."),
           _Switch(["-options","options"], [],
                   """Prompt for standard and additional values.

                   If you are calling an EMBOSS tool from within Biopython,
                   we DO NOT recommend using this option.
                   """),
           _Switch(["-debug","debug"], [],
                   "Write debug output to program.dbg."),
           _Switch(["-verbose","verbose"], [],
                   "Report some/full command line options"),
           _Switch(["-help","help"], [],
                   """Report command line options.

                   More information on associated and general qualifiers can
                   be found with -help -verbose
                   """),
           _Switch(["-warning","warning"], [],
                   "Report warnings."),
           _Switch(["-error","error"], [],
                   "Report errors."),
           _Switch(["-die","die"], [],
                   "Report dying program messages."),
            ]
        try :
            #Insert extra parameters - at the start just in case there
            #are any arguments which must come last:
            self.parameters = extra_parameters + self.parameters
        except AttributeError:
            #Should we raise an error?  The subclass should have set this up!
            self.parameters = extra_parameters
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #30
0
    def __init__(self, cmd="bbcontacts", **kwargs):

        # TODO: figure a way to group CL arguments as in `mutually_exclusive_group`
        if 'dssp_file' in list(kwargs.keys()) and 'psipred_file' in list(
                kwargs.keys()):
            msg = 'Provide only one of [dssp_file|psipred_file]!'
            raise RuntimeError(msg)
        elif not ('dssp_file' in list(kwargs.keys())
                  or 'psipred_file' in list(kwargs.keys())):
            msg = 'Provide one of [dssp_file|psipred_file]!'
            raise RuntimeError(msg)

        self.parameters = [
            _Option(['-c', 'config_file'],
                    'bbcontacts configuration file',
                    filename=True,
                    equate=False),
            _Option(
                ['-s', 'smoothing_size'],
                'Perform local background correction of the coupling matrix '
                'before decoding: from each coupling, subtract the average '
                'coupling (smoothed background) over an area extending by '
                'SMOOTHINGSIZE in each direction [default=10, use 0 for no '
                'local background correction]',
                equate=False),
            _Switch(
                ['-l', 'long_predictions'],
                'Turn off (slow) prediction-shortening mode (this mode is on '
                'by default but will only get triggered when long predictions occur)'
            ),
            _Option([
                '-n', 'pdb_name'
            ], 'Provide a PDB identifier (when also using -e, this will be the '
                    'PDB name to look for in EVALUATIONFILE)',
                    equate=False),
            _Option([
                '-e', 'evaluation_file'
            ], 'Provide a file containing the true contacts (BetaSheet916.dat, '
                    'BetaSheet1452.dat or same format) for evaluation',
                    filename=True,
                    equate=False),
            _Argument(['matfile'],
                      'CCMpred-like coupling matrix',
                      filename=True,
                      is_required=True),
            _Argument(['diversity_score'],
                      'sequence-dependent diversity score',
                      is_required=True),
            _Argument(['prefix'], 'output prefix', is_required=True),
            _Option(['-d', 'dssp_file'],
                    'DSSP secondary structure prediction file',
                    filename=True,
                    equate=False),
            _Option(['-p', 'psipred_file'],
                    'PSIPRED secondary structure prediction file',
                    filename=True,
                    equate=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #31
0
 def __init__(self, cmd="samtools", **kwargs):
     self.program_name = cmd
     self.parameters = [
         _StaticArgument("sort"),
         _Switch(["-o", "o"], """Output the final alignment
                                 to the standard output"""),
         _Switch(["-n", "n"], """Sort by read names rather
                                 than by chromosomal coordinates"""),
         _Option(["-m", "m"], "Approximately the maximum required memory",
                 equate=False,
                 checker_function=lambda x: isinstance(x, int)),
         _Argument(["input_bam"], "Input BAM file",
                   filename=True, is_required=True),
         _Argument(["out_prefix"], "Output prefix",
                   filename=True, is_required=True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #32
0
    def __init__(self, cmd=None, **kwargs):
        assert cmd is not None
        extra_parameters = [\
           _Switch(["-auto","auto"], [],
                   """Turn off prompts.
           
                   Automatic mode disables prompting, so we recommend you set
                   this argument all the time when calling an EMBOSS tool from
                   Biopython.
                   """),
           _Switch(["-stdout","stdout"], [],
                   "Write standard output."),
           _Switch(["-filter","filter"], [],
                   "Read standard input, write standard output."),
           _Switch(["-options","options"], [],
                   """Prompt for standard and additional values.

                   If you are calling an EMBOSS tool from within Biopython,
                   we DO NOT recommend using this option.
                   """),
           _Switch(["-debug","debug"], [],
                   "Write debug output to program.dbg."),
           _Switch(["-verbose","verbose"], [],
                   "Report some/full command line options"),
           _Switch(["-help","help"], [],
                   """Report command line options.

                   More information on associated and general qualifiers can
                   be found with -help -verbose
                   """),
           _Switch(["-warning","warning"], [],
                   "Report warnings."),
           _Switch(["-error","error"], [],
                   "Report errors."),
           _Switch(["-die","die"], [],
                   "Report dying program messages."),
            ]
        try :
            #Insert extra parameters - at the start just in case there
            #are any arguments which must come last:
            self.parameters = extra_parameters + self.parameters
        except AttributeError:
            #Should we raise an error?  The subclass should have set this up!
            self.parameters = extra_parameters
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #33
0
 def __init__(self, cmd='psicov', **kwargs):
     self.parameters = [
         _Switch(['-a', 'lasso'], "use approximate Lasso algorithm"),
         _Switch(['-n', 'noshrink'],
                 "don't pre-shrink the sample covariance matrix"),
         _Switch(['-f', 'filter'], "filer low-scoring contacts"),
         _Switch(['-p', 'ppv_output'],
                 "output PPV estimates rather than raw scores"),
         _Switch(['-l', 'noapc'], "don't apply APC to Lasso output"),
         _Option(['-r', 'rho'], "set initial rho paramter", equate=False),
         _Option(
             ['-d', 'sparsity'],
             "set target precision matrix sparsity [default: 0; not specified]",
             equate=False),
         _Option(['-t', 'convergence_threshold'],
                 "set Lasso convergence threshold [default: 1e-4]",
                 equate=False),
         _Option(
             ['-i', 'blosum_weighting'],
             "select BLOSUM-like weighting with given identity threshold "
             "[default selects threshold automatically]",
             equate=False),
         _Option(['-c', 'pseudocount'],
                 "set pseudocount value [default: 1]",
                 equate=False),
         _Option(['-j', 'sequence_separation'],
                 "set minimum sequence sparation [default: 5]",
                 equate=False),
         _Option(['-g', 'gap_fraction'],
                 "set maximum fraction of gaps [default: 0.9]",
                 equate=False),
         _Option(['-z', 'nr_threads'],
                 "set maximum number of threads",
                 equate=False),
         _Option(['-b', 'rho_parameter_file'],
                 "read rho parameter file",
                 filename=True,
                 equate=False),
         _Argument(['alnfile'],
                   "Input alignment file [JONES format]",
                   filename=True,
                   is_required=True),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #34
0
    def __init__(self, cmd="cmalign", **kwargs):
        self.parameters = [
            _Option(['-o','outfile'],'Print the MSA to a file',equate=False),
            _Option(['--withali','withali'],'Include the alignment used to '
                    'generate the model (if --rf of --gapthresh were used to'
                    'generate the model they should be used here as well',equate=False),
            _Option(['--gapthresh','gapthresh'],'Threshold for percent of gaps'
                    'in column to be considered as insertion',
                    equate=False),
            _Switch(['--rf','rf'],'Uses the RF line to build a model '
                    '(useful when expanding seed alignment that has a GF line from cmalign)'),
            _Switch(['-l','local'],'Performs a local alignment'),
            _Switch(['--sub','sub'],'Some sequences are not complete'),
            _Switch(['-p','posterior'],'Print the posterior probability of the alignment'),
            _Argument(['modelname'],'name of model',filename=True,is_required=True),
            _Argument(['fastafile'],'fasta file',filename=True,is_required=True)

            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #35
0
 def __init__(self, cmd="FastTreeMP", **kwargs):
     self.parameters = []
     self.parameters += \
         [_Switch(['-nosupport', 'nosupport'],
                  "don't include support values in the output tree")]
     self.parameters += \
         [_Argument(['', 'input'],
                    'input file')]
     # set a list of parameters which are objects derived from the base class
     # _AbstractParameter
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #36
0
 def __init__(self, cmd="needle", **kwargs):
     self.parameters = [
         _Option(["-asequence", "asequence"], "First sequence to align", filename=True, is_required=True),
         _Option(["-bsequence", "bsequence"], "Second sequence to align", filename=True, is_required=True),
         _Option(["-gapopen", "gapopen"], "Gap open penalty", is_required=True),
         _Option(["-gapextend", "gapextend"], "Gap extension penalty", is_required=True),
         _Option(["-datafile", "datafile"], "Matrix file", filename=True),
         _Option(["-endweight", "endweight"], "Apply And gap penalties"),
         _Option(["-endopen", "endopen"], "The score taken away when an end gap is created."),
         _Option(
             ["-endextend", "endextend"],
             "The score added to the end gap penality for each base or " "residue in the end gap.",
         ),
         _Switch(["-nobrief", "nobrief"], "Display extended identity and similarity"),
         _Switch(["-brief", "brief"], "Display brief identity and similarity"),
         _Option(["-similarity", "similarity"], "Display percent identity and similarity"),
         _Option(["-snucleotide", "snucleotide"], "Sequences are nucleotide (boolean)"),
         _Option(["-sprotein", "sprotein"], "Sequences are protein (boolean)"),
         _Option(["-aformat", "aformat"], "Display output in a different specified output format"),
     ]
     _EmbossCommandLine.__init__(self, cmd, **kwargs)
Example #37
0
 def __init__(self, cmd=None, **kwargs):
     assert cmd is not None
     extra_parameters = [
         _Switch(["-h", "help"], """help
                 Print help and exit"""),
         _Switch(["--detailed-help", "detailed_help"], """detailed_help
                 Print help, including all details and hidden options, and exit"""
                 ),
         _Switch(["--full-help", "full_help"], """full_help
                 Print help, including hidden options, and exit"""),
         _Switch(["-V", "--version", "version"], """version
                 Print version and exit"""),
     ]
     try:
         # Insert extra parameters - at the start just in case there
         # are any arguments which must come last:
         self.parameters = extra_parameters + self.parameters
     except AttributeError:
         # Should we raise an error?  The subclass should have set this up!
         self.parameters = extra_parameters
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #38
0
 def __init__(self, cmd="bwa aln", **kwargs):
     self.program_name = cmd
     self.parameters = \
             [
                 _Argument(["reference"],"Reference file name", filename=True, is_required=True),
                 _Argument(["read_file"],"Read  file name", filename=True, is_required=True),
                 _Option(["-n","n"],"Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",filename=False, equate=False,checker_function=lambda x :  isinstance(x,(int,float))),
                 _Option(["-o","o"],"Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",filename=False, equate=False,checker_function=lambda x :  isinstance(x,(int,float))),
                 _Option(["-e","e"],"Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-d","d"],"Disallow a long deletion within INT bp towards the 3-end [16]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-i","i"],"Disallow an indel within INT bp towards the ends [5]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-l","l"],"Take the first INT subsequence as seed. If INT is larger than the query sequence, seeding will be disabled. For long reads, this option is typically ranged from 25 to 35 for -k 2. [inf]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-k","k"],"Maximum edit distance in the seed [2]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-t","t"],"Number of threads (multi-threading mode) [1]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-M","M"],"Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-O","O"],"Gap open penalty [11]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-E","E"],"Gap extension penalty [4]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-R","R"],"Proceed with suboptimal alignments if there are no more than INT equally best hits. This option only affects paired-end mapping. Increasing this threshold helps to improve the pairing accuracy at the cost of speed, especially for short reads (~32bp).",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-q","q"],"Parameter for read trimming. BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT where l is the original read length. [0]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-B","B"],"Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Switch(["-c","c"],"Reverse query but not complement it, which is required for alignment in the color space."),
                 _Switch(["-N","N"],"Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                 _Switch(["-I","I"],"The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                 _Switch(["-b","b"],"Specify the input read sequence file is the BAM format"),
                 _Switch(["-b1","b1"],"When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                 _Switch(["-b2","b2"],"When -b is specified, only use the second read in a read pair in mapping.")
               ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #39
0
 def __init__(self, cmd="bwa aln", **kwargs):
     self.program_name = cmd
     self.parameters = \
             [
                 _Argument(["reference"],"Reference file name", filename=True, is_required=True),
                 _Argument(["read_file"],"Read  file name", filename=True, is_required=True),
                 _Option(["-n","n"],"Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",filename=False, equate=False,checker_function=lambda x :  isinstance(x,(int,float))),
                 _Option(["-o","o"],"Maximum edit distance if the value is INT, or the fraction of missing alignments given 2% uniform base error rate if FLOAT. In the latter case, the maximum edit distance is automatically chosen for different read lengths. [0.04]",filename=False, equate=False,checker_function=lambda x :  isinstance(x,(int,float))),
                 _Option(["-e","e"],"Maximum number of gap extensions, -1 for k-difference mode (disallowing long gaps) [-1]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-d","d"],"Disallow a long deletion within INT bp towards the 3-end [16]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-i","i"],"Disallow an indel within INT bp towards the ends [5]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-l","l"],"Take the first INT subsequence as seed. If INT is larger than the query sequence, seeding will be disabled. For long reads, this option is typically ranged from 25 to 35 for -k 2. [inf]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-k","k"],"Maximum edit distance in the seed [2]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-t","t"],"Number of threads (multi-threading mode) [1]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-M","M"],"Mismatch penalty. BWA will not search for suboptimal hits with a score lower than (bestScore-misMsc). [3]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-O","O"],"Gap open penalty [11]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-E","E"],"Gap extension penalty [4]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-R","R"],"Proceed with suboptimal alignments if there are no more than INT equally best hits. This option only affects paired-end mapping. Increasing this threshold helps to improve the pairing accuracy at the cost of speed, especially for short reads (~32bp).",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-q","q"],"Parameter for read trimming. BWA trims a read down to argmax_x{\sum_{i=x+1}^l(INT-q_i)} if q_l<INT where l is the original read length. [0]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Option(["-B","B"],"Length of barcode starting from the 5-end. When INT is positive, the barcode of each read will be trimmed before mapping and will be written at the BC SAM tag. For paired-end reads, the barcode from both ends are concatenated. [0]",filename=False, equate=False,checker_function= lambda x :  isinstance(x,int)),
                 _Switch(["-c","c"],"Reverse query but not complement it, which is required for alignment in the color space."),
                 _Switch(["-N","N"],"Disable iterative search. All hits with no more than maxDiff differences will be found. This mode is much slower than the default."),
                 _Switch(["-I","I"],"The input is in the Illumina 1.3+ read format (quality equals ASCII-64)."),
                 _Switch(["-b","b"],"Specify the input read sequence file is the BAM format"),
                 _Switch(["-b1","b1"],"When -b is specified, only use the first read in a read pair in mapping (skip single-end reads and the second reads)."),
                 _Switch(["-b2","b2"],"When -b is specified, only use the second read in a read pair in mapping.")
               ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
    def __init__(self, cmd="samtools", **kwargs):
        """Initialize the class."""
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(
                ["-E", "E"],
                """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor.""",
            ),
            _Switch(
                ["-e", "e"],
                """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment.""",
            ),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(
                ["-r", "r"],
                """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A).""",
            ),
            _Switch(
                ["-A", "A"],
                """When used jointly with -r this option overwrites
                    the original base quality""",
            ),
            _Option(
                ["-C", "C"],
                """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                equate=False,
                checker_function=lambda x: isinstance(x, int),
            ),
            _Argument(
                ["input", "input_file", "in_bam", "infile", "input_bam"],
                "Input BAM",
                filename=True,
                is_required=True,
            ),
            _Argument(
                ["reference", "reference_fasta", "ref"],
                "Reference FASTA to be indexed",
                filename=True,
                is_required=True,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #41
0
 def __init__(self, cmd="hmmscan", **kwargs):
     self.parameters = [
         #Other expert options:
         _Switch(["--daemon", "daemon"],
                 "run program as a daemon"),
         #Arguments
         _Argument(['hmmdb'],
                   "A path to a HMM profile database",
                   filename=True, is_required=True),
         _Argument(['seqfile'],
                   "A path to a sequence to search in",
                   filename=True, is_required=True)
     ]
     super(HMMScanCommandline, self).__init__(cmd, **kwargs)
Example #42
0
 def __init__(self, cmd="phastCons", **kwargs):
     self.parameters = [
         _Argument(["alignment"], ["input"], None, True, ""),
         _Argument(["models"], ["input"], None, True, ""),
         _Option(["--target-coverage"], ["input"], None, False, "", False),
         _Option(["--expected-length"], ["input"], None, False, "", False),
         _Option(["--rho"], ["input"], None, False, "", False),
         _Option(["--msa-format"], ["input"], None, False, "", False),
         _Option(["--estimate-trees"], ["input"], None, False, "", False),
         _Switch(["--no-post-probs"], ["input"]),
         _Option(["--most-conserved"], ["input"], None, False, "", False),
         _Option(["--estimate-rho"], ["input"], None, False, "", False),
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #43
0
 def __init__(self, cmd=None, **kwargs):
     assert cmd is not None
     extra_parameters = [
         # Core:
         _Switch(["-h", "h"], "show brief help on version and usage"),
     ]
     try:
         # Insert extra parameters - at the start just in case there
         # are any arguments which must come last:
         self.parameters = extra_parameters + self.parameters
     except AttributeError:
         # Should we raise an error?  The subclass should have set this up!
         self.parameters = extra_parameters
     super(_HMMCommandlineBase, self).__init__(cmd, **kwargs)
Example #44
0
 def __init__(self, cmd=None, **kwargs):
     assert cmd is not None
     extra_parameters = [
         # Core:
         _Switch(["-h", "h"],
                 "show brief help on version and usage"),
     ]
     try:
         # Insert extra parameters - at the start just in case there
         # are any arguments which must come last:
         self.parameters = extra_parameters + self.parameters
     except AttributeError:
         # Should we raise an error?  The subclass should have set this up!
         self.parameters = extra_parameters
     super(_HMMCommandlineBase, self).__init__(cmd, **kwargs)
Example #45
0
 def __init__(self, cmd="hmmscan", **kwargs):
     self.parameters = [
         #Other expert options:
         _Switch(["--daemon", "daemon"], "run program as a daemon"),
         #Arguments
         _Argument(['hmmdb'],
                   "A path to a HMM profile database",
                   filename=True,
                   is_required=True),
         _Argument(['seqfile'],
                   "A path to a sequence to search in",
                   filename=True,
                   is_required=True)
     ]
     super(HMMScanCommandline, self).__init__(cmd, **kwargs)
Example #46
0
    def __init__(self, cmd="samtools", **kwargs):
        self.program_name = cmd
        self.parameters = [
            _StaticArgument("calmd"),
            _Switch(
                ["-E", "E"],
                """Extended BAQ calculation.
                    This option trades specificity for sensitivity,
                    though the effect is minor.""",
            ),
            _Switch(
                ["-e", "e"],
                """Convert the read base to = if it is
                    identical to the aligned reference base.

                    Indel caller does not support the = bases
                    at the moment.""",
            ),
            _Switch(["-u", "u"], "Output uncompressed BAM"),
            _Switch(["-b", "b"], "Output compressed BAM "),
            _Switch(["-S", "S"], "The input is SAM with header lines "),
            _Switch(
                ["-r", "r"],
                """Compute the BQ tag (without -A)
                    or cap base quality by BAQ (with -A).""",
            ),
            _Switch(
                ["-A", "A"],
                """When used jointly with -r this option overwrites
                    the original base quality""",
            ),
            _Option(
                ["-C", "C"],
                """Coefficient to cap mapping quality
                    of poorly mapped reads.

                    See the pileup command for details.""",
                equate=False,
                checker_function=lambda x: isinstance(x, int),
            ),
            _Argument(
                ["input", "input_file", "in_bam", "infile", "input_bam"], "Input BAM", filename=True, is_required=True
            ),
            _Argument(
                ["reference", "reference_fasta", "ref"],
                "Reference FASTA to be indexed",
                filename=True,
                is_required=True,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #47
0
 def __init__(self, cmd="probcons", **kwargs):
     self.parameters = \
         [
         #Note that some options cannot be assigned via properties using the
         #original documented option (because hyphens are not valid for names in
         #python), e.g cmdline.pre-training = 3 will not work
         #In these cases the shortened option name should be used
         #cmdline.pre = 3
         _Switch(["-clustalw", "clustalw"], ["input"],
                 "Use CLUSTALW output format instead of MFA"),
         _Option(["-c", "c", "--consistency", "consistency" ], ["input"],
                 lambda x: x in range(0,6),
                 0,
                 "Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
                 0),
         _Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"], ["input"],
                 lambda x: x in range(0,1001),
                 0,
                 "Use 0 <= REPS <= 1000 (default: 100) passes of iterative-refinement",
                 0),
         _Option(["-pre", "--pre-training", "pre-training", "pre"], ["input"],
                 lambda x: x in range(0,21),
                 0,
                 "Use 0 <= REPS <= 20 (default: 0) rounds of pretraining",
                 0),
         _Switch(["-pairs", "pairs"], ["input"],
                 "Generate all-pairs pairwise alignments"),
         _Switch(["-viterbi", "viterbi"], ["input"],
                 "Use Viterbi algorithm to generate all pairs (automatically enables -pairs)"),
         _Switch(["-verbose", "verbose"], ["input"],
                 "Report progress while aligning (default: off)"),
         _Option(["-annot", "annot"], ["input"],
                 None,
                 0,
                 "Write annotation for multiple alignment to FILENAME",
                 0),
         _Option(["-t", "t", "--train", "train"], ["input"],
                 None,
                 0,
                 "Compute EM transition probabilities, store in FILENAME (default: no training)",
                 0),
         _Switch(["-e", "e", "--emissions", "emissions"], ["input"],
                 "Also reestimate emission probabilities (default: off)"),
         _Option(["-p", "p", "--paramfile", "paramfile"], ["input"],
                 None,
                 0,
                 "Read parameters from FILENAME",
                 0),
         _Switch(["-a", "--alignment-order", "alignment-order", "a"], ["input"],
                 "Print sequences in alignment order rather than input order (default: off)"),
         #Input file name
         _Argument(["input"], ["input", "file"], None, 1,
                   "Input file name. Must be multiple FASTA alignment "+ \
                   "(MFA) format"),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #48
0
 def __init__(self, cmd="probcons", **kwargs):
     self.parameters = \
         [
         #Note that some options cannot be assigned via properties using the
         #original documented option (because hyphens are not valid for names in
         #python), e.g cmdline.pre-training = 3 will not work
         #In these cases the shortened option name should be used
         #cmdline.pre = 3
         _Switch(["-clustalw", "clustalw"], ["input"],
                 "Use CLUSTALW output format instead of MFA"),
         _Option(["-c", "c", "--consistency", "consistency" ], ["input"],
                 lambda x: x in range(0,6),
                 0,
                 "Use 0 <= REPS <= 5 (default: 2) passes of consistency transformation",
                 0),
         _Option(["-ir", "--iterative-refinement", "iterative-refinement", "ir"], ["input"],
                 lambda x: x in range(0,1001),
                 0,
                 "Use 0 <= REPS <= 1000 (default: 100) passes of iterative-refinement",
                 0),
         _Option(["-pre", "--pre-training", "pre-training", "pre"], ["input"],
                 lambda x: x in range(0,21),
                 0,
                 "Use 0 <= REPS <= 20 (default: 0) rounds of pretraining",
                 0),
         _Switch(["-pairs", "pairs"], ["input"],
                 "Generate all-pairs pairwise alignments"),
         _Switch(["-viterbi", "viterbi"], ["input"],
                 "Use Viterbi algorithm to generate all pairs (automatically enables -pairs)"),
         _Switch(["-verbose", "verbose"], ["input"],
                 "Report progress while aligning (default: off)"),
         _Option(["-annot", "annot"], ["input"],
                 None,
                 0,
                 "Write annotation for multiple alignment to FILENAME",
                 0),
         _Option(["-t", "t", "--train", "train"], ["input"],
                 None,
                 0,
                 "Compute EM transition probabilities, store in FILENAME (default: no training)",
                 0),
         _Switch(["-e", "e", "--emissions", "emissions"], ["input"],
                 "Also reestimate emission probabilities (default: off)"),
         _Option(["-p", "p", "--paramfile", "paramfile"], ["input"],
                 None,
                 0,
                 "Read parameters from FILENAME",
                 0),
         _Switch(["-a", "--alignment-order", "alignment-order", "a"], ["input"],
                 "Print sequences in alignment order rather than input order (default: off)"),
         #Input file name
         _Argument(["input"], ["input", "file"], None, 1,
                   "Input file name. Must be multiple FASTA alignment "+ \
                   "(MFA) format"),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #49
0
    def __init__(self, cmd="bbcontacts", **kwargs):

        # TODO: figure a way to group CL arguments as in `mutually_exclusive_group`
        if "dssp_file" in list(kwargs.keys()) and "psipred_file" in list(kwargs.keys()):
            msg = "Provide only one of [dssp_file|psipred_file]!"
            raise RuntimeError(msg)
        elif not ("dssp_file" in list(kwargs.keys()) or "psipred_file" in list(kwargs.keys())):
            msg = "Provide one of [dssp_file|psipred_file]!"
            raise RuntimeError(msg)

        self.parameters = [
            _Option(["-c", "config_file"], "bbcontacts configuration file", filename=True, equate=False),
            _Option(
                ["-s", "smoothing_size"],
                "Perform local background correction of the coupling matrix "
                "before decoding: from each coupling, subtract the average "
                "coupling (smoothed background) over an area extending by "
                "SMOOTHINGSIZE in each direction [default=10, use 0 for no "
                "local background correction]",
                equate=False,
            ),
            _Switch(
                ["-l", "long_predictions"],
                "Turn off (slow) prediction-shortening mode (this mode is on "
                "by default but will only get triggered when long predictions occur)",
            ),
            _Option(
                ["-n", "pdb_name"],
                "Provide a PDB identifier (when also using -e, this will be the "
                "PDB name to look for in EVALUATIONFILE)",
                equate=False,
            ),
            _Option(
                ["-e", "evaluation_file"],
                "Provide a file containing the true contacts (BetaSheet916.dat, "
                "BetaSheet1452.dat or same format) for evaluation",
                filename=True,
                equate=False,
            ),
            _Argument(["matfile"], "CCMpred-like coupling matrix", filename=True, is_required=True),
            _Argument(["diversity_score"], "sequence-dependent diversity score", is_required=True),
            _Argument(["prefix"], "output prefix", is_required=True),
            _Option(["-d", "dssp_file"], "DSSP secondary structure prediction file", filename=True, equate=False),
            _Option(["-p", "psipred_file"], "PSIPRED secondary structure prediction file", filename=True, equate=False),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #50
0
 def __init__(self, cmd=None, **kwargs):
     extra_parameters = [
         _Option(['-C','constrains'],'''Calculate structures subject to constraints.   The  program  reads  first  the
           sequence,  then  a string containing constraints on the structure encoded with
           the symbols: | (the corresponding base  has  to  be  paired  x  (the  base  is
           unpaired) < (base i is paired with a base j>i) > (base i is paired with a base
           j<i) and matching brackets ( ) (base i pairs base j)  With  the  exception  of
           "|", constraints will disallow all pairs conflicting with the constraint. This
           is usually sufficient to enforce the constraint, but occasionally a  base  may
           stay  unpaired in spite of constraints. PF folding ignores constraints of type
           "|".
     ''',
                 filename=True,equate=False),
         _Option(['-T','temperature'],'Rescale energy parameters to a temperature of temp C. Default is 37C.',
                 equate=False),
         _Option(['-P','paramfile'],'Read energy parameters from paramfile, instead of using the default  parameter set.',
                 equate=False,filename=True),
         _Option(['-S','scale'],'In the calculation of the pf use scale*mfe as an  estimate  for  the  ensemble free  energy (used to avoid overflows)',
                 equate=False),
         _Option(['-nsp','pairs'],'Allow other pairs',equate=False),
         _Switch(['-d0','dang-ignore'],'Ignore dangling ends'),
         _Switch(['-d1','dang-unpaired'],'only unpaired bases  can  participate  in  at most one dangling end, this is the default for mfe folding but unsupported for the partition function folding'),
         _Switch(['-d2','dang-helix'],'dangling energies  will  be  added  for  the bases adjacent to a helix on both sides in any case'),
         _Switch(['-d3','dang-coaxial'],'mfe  folding  will  allow  coaxial  stacking of adjacent helices in multi-loops'),
         _Switch(['noLP','noLonelyPairs'],'Produce structures without lonely pairs (helices of length 1)'),
         _Switch(['-noGU','noGU'],'Do not allow GU pairs'),
         _Switch(['-noCloseGU','noCloseGU'],'Do not allow GU pairs at the end of helices'),
         _Switch(['-circ','circular'],'Assume  a  circular (instead of linear) RNA molecule'),
         _Switch(['-noPS','noPostScript'],'Do not produce postscript drawing of the mfe structure')
         ]
     try:
         #Insert extra parameters - at the start just in case there
         #are any arguments which must come last:
         self.parameters = extra_parameters + self.parameters
     except AttributeError:
         #Should we raise an error?  The subclass should have set this up!
         self.parameters = extra_parameters
     AbstractCommandline.__init__(self, cmd, **kwargs)
    def __init__(self, cmd="gt", **kwargs):
        """ Construct and evaluate genometools merge commands.

        Example
        >>> x = Merge(infiles=["test.gff3"], tidy=True)
        >>> print(x)
        gt merge -tidy test.gff3
        """

        self.program_name = f"{cmd} merge"
        self.parameters = [
            _StaticArgument("merge"),
            _Switch(["-tidy", "tidy"],
                    ("Try to tidy the GFF3 files up during parsing.")),
            _Switch(["-retainids", "retainids"],
                    ("when available, use the original IDs provided in the "
                     "source file")),
            _Option(
                ["-o", "outfile"],
                "redirect output to specified file",
                checker_function=check_is_str,
                filename=True,
                equate=False,
            ),
            _Switch(["-gzip", "gzip"], "write gzip compressed output file."),
            _Switch(
                ["-bzip2", "bzip2"],
                "write bzip2 compressed output file.",
            ),
            _Switch(
                ["-force", "force"],
                "force writing to output file",
            ),
            _Switch(["-help", "help"], "Show help and exit"),
            _Switch(["-version", "version"],
                    "display version information and exit"),
            _ArgumentList(["infiles"],
                          "The GFF3 files to operate on.",
                          checker_function=check_is_list_of_str,
                          filename=True,
                          is_required=True)
        ]

        super().__init__(cmd, **kwargs)
        return
Example #52
0
 def __init__(self, cmd = 'bwa', **kwargs):
     self.parameters = [
         _StaticArgument("index"),
         _Option(["-a", "algorithm"],
             "BWT construction algorithm: bwtsw or is [auto]",
             equate = False),
         _Option(["-p", "prefix"],
             "prefix of the index [same as fasta name]",
             equate = False),
         _Option(["-b", "block_size"],
             "block size for the bwtsw algorithm \
             (effective with -a bwtsw) [10000000]",
             equate = False),
         _Switch(['-6', "index_64"],
             "index files named as <in.fasta>.64.* instead of <in.fasta>.*"),
         _Argument(["in_fasta"],
                 "Input FASTA file",
                 filename = True,
                 is_required = True)
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
     return
Example #53
0
 def __init__(self, cmd="phastCons", **kwargs):
     self.parameters = [
         _Argument(["alignment"], ["input"],
                 None, True, ""),
         _Argument(["models"], ["input"],
                 None, True, ""),
         _Option(["--target-coverage"], ["input"],
                 None, False, "", False),
         _Option(["--expected-length"], ["input"],
                 None, False, "", False),
         _Option(["--rho"], ["input"],
                 None, False, "", False),
         _Option(["--msa-format"], ["input"],
                 None, False, "", False),
         _Option(["--estimate-trees"], ["input"],
                 None, False, "", False),
         _Switch(["--no-post-probs"], ["input"]),
         _Option(["--most-conserved"], ["input"],
                 None, False, "", False),
         _Option(["--estimate-rho"], ["input"],
                 None, False, "", False),
         ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
    def __init__(self, cmd="raxmlHPC", **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Option(
                ["-a", "weight_filename"],
                "Name of a column weight file to assign individual weights "
                "to each column of the alignment. Those weights must be "
                "integers separated by any type and number of whitespaces "
                "within a separate file.",
                filename=True,
                equate=False,
            ),
            _Option(["-b", "bootstrap_seed"],
                    "Random seed for bootstrapping.",
                    equate=False),
            _Option(
                ["-c", "num_categories"],
                "Number of distinct rate categories for RAxML when "
                "evolution model is set to GTRCAT or GTRMIX."
                "Individual per-site rates are categorized into this "
                "many rate categories to accelerate computations. "
                "Default: 25.",
                equate=False,
            ),
            _Switch(
                ["-d", "random_starting_tree"],
                "Start ML optimization from random starting tree.",
            ),
            _Option(
                ["-e", "epsilon"],
                "Set model optimization precision in log likelihood units "
                "for final optimization of tree topology under MIX/MIXI "
                "or GAMMA/GAMMAI."
                "Default: 0.1 for models not using proportion of "
                "invariant sites estimate; 0.001 for models using "
                "proportion of invariant sites estimate.",
                equate=False,
            ),
            _Option(
                ["-E", "exclude_filename"],
                "An exclude file name, containing a specification of "
                "alignment positions you wish to exclude.  Format is "
                "similar to Nexus, the file shall contain entries like "
                "'100-200 300-400'; to exclude a single column write, "
                "e.g., '100-100'. If you use a mixed model, an "
                "appropriately adapted model file will be written.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-f", "algorithm"],
                r"""
                        Select algorithm:

                        a: Rapid Bootstrap analysis and search for best-scoring ML
                        tree in one program run.

                        b: Draw bipartition information on a tree provided with '-t'
                        based on multiple trees (e.g. form a bootstrap) in a file
                        specifed by '-z'.

                        c: Check if the alignment can be properly read by RAxML.

                        d: New rapid hill-climbing (DEFAULT).

                        e: Optimize model+branch lengths for given input tree under
                        GAMMA/GAMMAI only.

                        g: Compute per site log Likelihoods for one ore more trees
                        passed via '-z' and write them to a file that can be read
                        by CONSEL.

                        h: Compute log likelihood test (SH-test) between best tree
                        passed via '-t' and a bunch of other trees passed via '-z'.

                        i: Perform a really thorough bootstrap, refinement of final
                        bootstrap tree under GAMMA and a more exhaustive algorithm.

                        j: Generate a bunch of bootstrapped alignment files from an
                        original alignemnt file.

                        m: Compare bipartitions between two bunches of trees passed
                        via '-t' and '-z' respectively. This will return the
                        Pearson correlation between all bipartitions found in the
                        two tree files. A file called
                        RAxML_bipartitionFrequencies.outputFileName will be
                        printed that contains the pair-wise bipartition
                        frequencies of the two sets.

                        n: Compute the log likelihood score of all trees contained
                        in a tree file provided by '-z' under GAMMA or
                        GAMMA+P-Invar.

                        o: Old and slower rapid hill-climbing.

                        p: Perform pure stepwise MP addition of new sequences to an
                        incomplete starting tree.

                        s: Split up a multi-gene partitioned alignment into the
                        respective subalignments.

                        t: Do randomized tree searches on one fixed starting tree.

                        w: Compute ELW test on a bunch of trees passed via '-z'.

                        x: Compute pair-wise ML distances, ML model parameters will
                        be estimated on an MP starting tree or a user-defined
                        tree passed via '-t', only allowed for GAMMA-based models
                        of rate heterogeneity.
                        """,
                checker_function=(
                    lambda x: isinstance(x, basestring) and len(x) == 1),
                equate=False,
            ),
            _Option(
                ["-g", "grouping_constraint"],
                "File name of a multifurcating constraint tree. "
                "this tree does not need to be comprehensive, i.e. "
                "contain all taxa.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-i", "rearrangements"],
                "Initial rearrangement setting for the subsequent "
                "application of topological changes phase.",
                equate=False,
            ),
            _Switch(
                ["-j", "checkpoints"],
                "Write checkpoints (intermediate tree topologies).",
            ),
            _Switch(
                ["-k", "bootstrap_branch_lengths"],
                "Print bootstrapped trees with branch lengths. "
                "The bootstraps will run a bit longer, because model "
                "parameters will be optimized at the end of each run. "
                "Use with CATMIX/PROTMIX or GAMMA/GAMMAI.",
            ),
            _Option(
                ["-l", "cluster_threshold"],
                "Threshold for sequence similarity clustering. "
                "RAxML will then print out an alignment to a file "
                "called sequenceFileName.reducedBy.threshold that "
                "only contains sequences <= the specified threshold "
                "that must be between 0.0 and 1.0. RAxML uses the "
                "QT-clustering algorithm to perform this task. "
                "In addition, a file called "
                "RAxML_reducedList.outputFileName will be written "
                "that contains clustering information.",
                equate=False,
            ),
            _Option(
                ["-L", "cluster_threshold_fast"],
                "Same functionality as '-l', but uses a less "
                "exhaustive and thus faster clustering algorithm. "
                "This is intended for very large datasets with more "
                "than 20,000-30,000 sequences.",
                equate=False,
            ),
            _Option(
                ["-m", "model"],
                r"""Model of Nucleotide or Amino Acid Substitution:

                        NUCLEOTIDES:

                        GTRCAT         : GTR + Optimization of substitution rates + Optimization of site-specific
                        evolutionary rates which are categorized into numberOfCategories distinct
                        rate categories for greater computational efficiency
                        if you do a multiple analysis with  '-#' or '-N' but without bootstrapping the program
                        will use GTRMIX instead

                        GTRGAMMA       : GTR + Optimization of substitution rates + GAMMA model of rate
                        heterogeneity (alpha parameter will be estimated)

                        GTRMIX         : Inference of the tree under GTRCAT
                        and thereafter evaluation of the final tree topology under GTRGAMMA

                        GTRCAT_GAMMA   : Inference of the tree with site-specific evolutionary rates.
                        However, here rates are categorized using the 4 discrete GAMMA rates.
                        Evaluation of the final tree topology under GTRGAMMA

                        GTRGAMMAI      : Same as GTRGAMMA, but with estimate of proportion of invariable sites

                        GTRMIXI        : Same as GTRMIX, but with estimate of proportion of invariable sites

                        GTRCAT_GAMMAI  : Same as GTRCAT_GAMMA, but with estimate of proportion of invariable sites

                        AMINO ACIDS:

                        PROTCATmatrixName[F]    : specified AA matrix + Optimization of substitution rates + Optimization of site-specific
                        evolutionary rates which are categorized into numberOfCategories distinct
                        rate categories for greater computational efficiency
                        if you do a multiple analysis with  '-#' or '-N' but without bootstrapping the program
                        will use PROTMIX... instead

                        PROTGAMMAmatrixName[F]  : specified AA matrix + Optimization of substitution rates + GAMMA model of rate
                        heterogeneity (alpha parameter will be estimated)

                        PROTMIXmatrixName[F]    : Inference of the tree under specified AA matrix + CAT
                        and thereafter evaluation of the final tree topology under specified AA matrix + GAMMA

                        PROTCAT_GAMMAmatrixName[F] : Inference of the tree under specified AA matrix and site-specific evolutionary rates.
                        However, here rates are categorized using the 4 discrete GAMMA rates.
                        Evaluation of the final tree topology under specified AA matrix + GAMMA

                        PROTGAMMAImatrixName[F] : Same as PROTGAMMAmatrixName[F], but with estimate of proportion of invariable sites

                        PROTMIXImatrixName[F]   : Same as PROTMIXmatrixName[F], but with estimate of proportion of invariable sites

                        PROTCAT_GAMMAImatrixName[F] : Same as PROTCAT_GAMMAmatrixName[F], but with estimate of proportion of invariable sites

                        Available AA substitution models: DAYHOFF, DCMUT, JTT, MTREV, WAG, RTREV, CPREV, VT, BLOSUM62, MTMAM, GTR
                        With the optional 'F' appendix you can specify if you want to use empirical base frequencies
                        Please not that for mixed models you can in addition specify the per-gene AA model in
                        the mixed model file (see manual for details)
                        """,
                equate=False,
            ),
            _Switch(
                ["-M", "partition_branch_lengths"],
                "Switch on estimation of individual per-partition "
                "branch lengths. Only has effect when used in "
                "combination with 'partition_filename' ('-q'). "
                "Branch lengths for individual partitions will be "
                "printed to separate files.  A weighted average of the "
                "branch lengths is computed by using the respective "
                "partition lengths. ",
            ),
            _Option(
                ["-n", "name"],
                "Name used in the output files.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-o", "outgroup"],
                "Name of a single outgroup or a comma-separated list "
                "of outgroups, eg '-o Rat' or '-o Rat,Mouse'. In case "
                "that multiple outgroups are not monophyletic the "
                "first name in the list will be selected as outgroup. "
                "Don't leave spaces between taxon names!",
                checker_function=lambda x: len(x.split()) == 1,
                equate=False,
            ),
            _Option(
                ["-q", "partition_filename"],
                "File name containing the assignment of models to "
                "alignment partitions for multiple models of "
                "substitution. For the syntax of this file please "
                "consult the RAxML manual.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-p", "parsimony_seed"],
                "Random number seed for the parsimony inferences. "
                "This allows you to reproduce your results and will "
                "help developers debug the program. This option HAS "
                "NO EFFECT in the parallel MPI version.",
                equate=False,
            ),
            _Option(
                ["-P", "protein_model"],
                "File name of a user-defined AA (Protein) substitution "
                "model. This file must contain 420 entries, the first "
                "400 being the AA substitution rates (this must be a "
                "symmetric matrix) and the last 20 are the empirical "
                "base frequencies.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-r", "binary_constraint"],
                "File name of a binary constraint tree. "
                "This tree does not need to be comprehensive, i.e. "
                "contain all taxa.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-s", "sequences"],
                "Name of the alignment data file, in PHYLIP format.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-t", "starting_tree"],
                "File name of a user starting tree, in Newick format.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-T", "threads"],
                "Number of threads to run. "
                "PTHREADS VERSION ONLY! "
                "Make sure to set this at most the number of CPUs "
                "you have on your machine, otherwise, there will be "
                "a huge performance decrease!",
                equate=False,
            ),
            _Option(
                ["-u", "num_bootstrap_searches"],
                "Number of multiple bootstrap searches per replicate. "
                "Use this to obtain better ML trees for each "
                "replicate. Default: 1 ML search per bootstrap "
                "replicate.",
                equate=False,
            ),
            _Switch(["-v", "version"], "Display version information."),
            _Option(
                ["-w", "working_dir"],
                "Name of the working directory where RAxML will "
                "write its output files. Default: current directory.",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-x", "rapid_bootstrap_seed"],
                "Random seed for rapid bootstrapping.",
                equate=False,
            ),
            _Switch(
                ["-y", "parsimony"],
                "Only compute a parsimony starting tree, then exit.",
            ),
            _Option(
                ["-z", "bipartition_filename"],
                "Name of a file containing multiple trees, e.g. from "
                "a bootstrap run, that shall be used to draw "
                "bipartition values onto a tree provided with '-t'. "
                "It can also be used to compute per-site log "
                "likelihoods in combination with '-f g', and to read "
                "a bunch of trees for a couple of other options "
                "('-f h', '-f m', '-f n').",
                filename=True,
                equate=False,
            ),
            _Option(
                ["-N", "-#", "num_replicates"],
                "Number of alternative runs on distinct starting trees. "
                "In combination with the '-b' option, this will invoke a "
                "multiple bootstrap analysis. "
                "DEFAULT: 1 single analysis."
                "Note that '-N' has been added as an alternative since "
                "'-#' sometimes caused problems with certain MPI job "
                "submission systems, since '-#' is often used to start "
                "comments. ",
                equate=False,
            ),
        ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
        # ENH: enforce -s, -n and -m
        if not self.parsimony_seed:
            self.parsimony_seed = 10000
Example #55
0
 def __init__(self, cmd="muscle", **kwargs):
     CLUSTERING_ALGORITHMS   = ["upgma", "upgmb", "neighborjoining"]
     DISTANCE_MEASURES_ITER1 = ["kmer6_6", "kmer20_3", "kmer20_4", "kbit20_3",
                                "kmer4_6"]
     DISTANCE_MEASURES_ITER2 = DISTANCE_MEASURES_ITER1 + \
                               ["pctid_kimura", "pctid_log"]
     OBJECTIVE_SCORES        = ["sp", "ps", "dp", "xp", "spf", "spm"]
     TREE_ROOT_METHODS       = ["pseudo", "midlongestspan", "minavgleafdist"]
     SEQUENCE_TYPES          = ["protein", "nucleo", "auto"]
     WEIGHTING_SCHEMES       = ["none", "clustalw", "henikoff", "henikoffpb",
                                "gsc", "threeway"]
     self.parameters = \
        [
         #Can't use "in" as the final alias as this is a reserved word in python:
         _Option(["-in", "in", "input"],
                 "Input filename",
                 filename=True,
                 equate=False),
         _Option(["-out", "out"],
                 "Output filename",
                 filename=True,
                 equate=False),
         _Switch(["-diags", "diags"],
                 "Find diagonals (faster for similar sequences)"),
         _Switch(["-profile", "profile"],
                 "Perform a profile alignment"),
         _Option(["-in1", "in1"],
                 "First input filename for profile alignment",
                 filename=True,
                 equate=False),
         _Option(["-in2", "in2"],
                 "Second input filename for a profile alignment",
                 filename=True,
                 equate=False),
         #anchorspacing   Integer              32                 Minimum spacing between
         _Option(["-anchorspacing", "anchorspacing"],
                 "Minimum spacing between anchor columns",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         #center          Floating point       [1]                Center parameter.
         #                                                        Should be negative.
         _Option(["-center", "center"],
                 "Center parameter - should be negative",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #cluster1        upgma                upgmb              Clustering method.
         _Option(["-cluster1", "cluster1"],
                 "Clustering method used in iteration 1",
                 checker_function=lambda x: x in CLUSTERING_ALGORITHMS,
                 equate=False),
         #cluster2        upgmb                                   cluster1 is used in
         #                neighborjoining                         iteration 1 and 2,
         #                                                        cluster2 in later
         #                                                        iterations.
         _Option(["-cluster2", "cluster2"],
                 "Clustering method used in iteration 2",
                 checker_function=lambda x: x in CLUSTERING_ALGORITHMS,
                 equate=False),
         #diaglength      Integer              24                 Minimum length of
         #                                                        diagonal.
         _Option(["-diaglength", "diaglength"],
                 "Minimum length of diagonal",
                 checker_function=lambda x: isinstance(x, int),
                 equate=True),
         #diagmargin      Integer              5                  Discard this many
         #                                                        positions at ends of
         #                                                        diagonal.
         _Option(["-diagmargin", "diagmargin"],
                 "Discard this many positions at ends of diagonal",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         #distance1       kmer6_6              Kmer6_6 (amino) or Distance measure for
         #                kmer20_3             Kmer4_6 (nucleo)   iteration 1.
         #                kmer20_4
         #                kbit20_3
         #                kmer4_6
         _Option(["-distance1", "distance1"],
                 "Distance measure for iteration 1",
                 checker_function=lambda x: x in DISTANCE_MEASURES_ITER1,
                 equate=False),
         #distance2       kmer6_6              pctid_kimura       Distance measure for
         #                kmer20_3                                iterations 2, 3 ...
         #                kmer20_4
         #                kbit20_3
         #                pctid_kimura
         #                pctid_log
         _Option(["-distance2", "distance2"],
                 "Distance measure for iteration 2",
                 checker_function=lambda x: x in DISTANCE_MEASURES_ITER2,
                 equate=False),
         #gapopen         Floating point       [1]                The gap open score.
         #                                                        Must be negative.
         _Option(["-gapopen", "gapopen"],
                 "Gap open score - negative number",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #hydro           Integer              5                  Window size for
         #                                                        determining whether a
         #                                                        region is hydrophobic.
         _Option(["-hydro", "hydro"],
                 "Window size for hydrophobic region",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         #hydrofactor     Floating point       1.2                Multiplier for gap
         #                                                        open/close penalties in
         #                                                        hydrophobic regions.
         _Option(["-hydrofactor", "hydrofactor"],
                 "Multiplier for gap penalties in hydrophobic regions",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #log             File name            None.              Log file name (delete
         #                                                        existing file).
         _Option(["-log", "log"],
                 "Log file name",
                 filename=True,
                 equate=False),
         #loga            File name            None.              Log file name (append
         #                                                        to existing file).
         _Option(["-loga", "loga"],
                 "Log file name (append to existing file)",
                 filename=True,
                 equate=False),
         #maxdiagbreak    Integer              1                  Maximum distance
         #                                                        between two diagonals
         #                                                        that allows them to
         #                                                        merge into one
         #                                                        diagonal.
         _Option(["-maxdiagbreak", "maxdiagbreak"],
                 "Maximum distance between two diagonals that allows "
                 "them to merge into one diagonal",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         #maxhours        Floating point       None.              Maximum time to run in
         #                                                        hours. The actual time
         #                                                        may exceed the
         #                                                        requested limit by a
         #                                                        few minutes. Decimals
         #                                                        are allowed, so 1.5
         #                                                        means one hour and 30
         #                                                        minutes.
         _Option(["-maxhours", "maxhours"],
                 "Maximum time to run in hours",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #maxiters        Integer 1, 2 ...     16                 Maximum number of
         #                                                        iterations.
         _Option(["-maxiters", "maxiters"],
                 "Maximum number of iterations",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         #maxtrees        Integer              1                  Maximum number of new
         #                                                        trees to build in
         #                                                        iteration 2.
         _Option(["-maxtrees", "maxtrees"],
                 "Maximum number of trees to build in iteration 2",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         #minbestcolscore Floating point       [1]                Minimum score a column
         #                                                        must have to be an
         #                                                        anchor.
         _Option(["-minbestcolscore", "minbestcolscore"],
                 "Minimum score a column must have to be an anchor",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #minsmoothscore  Floating point       [1]                Minimum smoothed score
         #                                                        a column must have to
         #                                                        be an anchor.
         _Option(["-minsmoothscore", "minsmoothscore"],
                 "Minimum smoothed score a column must have to "
                 "be an anchor",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #objscore        sp                   spm                Objective score used by
         #                ps                                      tree dependent
         #                dp                                      refinement.
         #                xp                                      sp=sum-of-pairs score.
         #                spf                                     spf=sum-of-pairs score
         #                spm                                     (dimer approximation)
         #                                                        spm=sp for < 100 seqs,
         #                                                        otherwise spf
         #                                                        dp=dynamic programming
         #                                                        score.
         #                                                        ps=average profile-
         #                                                        sequence score.
         #                                                        xp=cross profile score.
         _Option(["-objscore", "objscore"],
                 "Objective score used by tree dependent refinement",
                 checker_function=lambda x: x in OBJECTIVE_SCORES,
                 equate=False),
         #root1           pseudo               pseudo             Method used to root
         _Option(["-root1", "root1"],
                 "Method used to root tree in iteration 1",
                 checker_function=lambda x: x in TREE_ROOT_METHODS,
                 equate=False),
         #root2           midlongestspan                          tree; root1 is used in
         #                minavgleafdist                          iteration 1 and 2,
         #                                                        root2 in later
         #                                                        iterations.
         _Option(["-root2", "root2"],
                 "Method used to root tree in iteration 2",
                 checker_function=lambda x: x in TREE_ROOT_METHODS,
                 equate=False),
         #seqtype         protein              auto               Sequence type.
         #                nucleo
         #                auto
         _Option(["-seqtype", "seqtype"],
                 "Sequence type",
                 checker_function=lambda x: x in SEQUENCE_TYPES,
                 equate=False),
         #smoothscoreceil Floating point       [1]                Maximum value of column
         #                                                        score for smoothing
         #                                                        purposes.
         _Option(["-smoothscoreceil", "smoothscoreceil"],
                 "Maximum value of column score for smoothing",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #smoothwindow    Integer              7                  Window used for anchor
         #                                                        column smoothing.
         _Option(["-smoothwindow", "smoothwindow"],
                 "Window used for anchor column smoothing",
                 checker_function=lambda x: isinstance(x, int),
                 equate=False),
         #SUEFF           Floating point value 0.1                Constant used in UPGMB
         #                between 0 and 1.                        clustering. Determines
         #                                                        the relative fraction
         #                                                        of average linkage
         #                                                        (SUEFF) vs. nearest-
         #                                                        neighbor linkage (1
         #                                                        SUEFF).
         _Option(["-sueff", "sueff"],
                 "Constant used in UPGMB clustering",
                 checker_function=lambda x: isinstance(x, float),
                 equate=False),
         #tree1           File name            None               Save tree produced in
         _Option(["-tree1", "tree1"],
                 "Save Newick tree from iteration 1",
                 equate=False),
         #tree2                                                   first or second
         #                                                        iteration to given file
         #                                                        in Newick (Phylip-
         #                                                        compatible) format.
         _Option(["-tree2", "tree2"],
                 "Save Newick tree from iteration 2",
                 equate=False),
         #weight1         none                 clustalw           Sequence weighting
         _Option(["-weight1", "weight1"],
                 "Weighting scheme used in iteration 1",
                 checker_function=lambda x: x in WEIGHTING_SCHEMES,
                 equate=False),
         #weight2         henikoff                                scheme.
         #                henikoffpb                              weight1 is used in
         #                gsc                                     iterations 1 and 2.
         #                clustalw                                weight2 is used for
         #                threeway                                tree-dependent
         #                                                        refinement.
         #                                                        none=all sequences have
         #                                                        equal weight.
         #                                                        henikoff=Henikoff &
         #                                                        Henikoff weighting
         #                                                        scheme.
         #                                                        henikoffpb=Modified
         #                                                        Henikoff scheme as used
         #                                                        in PSI-BLAST.
         #                                                        clustalw=CLUSTALW
         #                                                        method.
         #                                                        threeway=Gotoh three-
         #                                                        way method.
         _Option(["-weight2", "weight2"],
                 "Weighting scheme used in iteration 2",
                 checker_function=lambda x: x in WEIGHTING_SCHEMES,
                 equate=False),
         #################### FORMATS #######################################
         # Multiple formats can be specified on the command line
         # If -msf appears it will be used regardless of other formats
         # specified. If -clw appears (and not -msf), clustalw format will be
         # used regardless of other formats specified. If both -clw and
         # -clwstrict are specified -clwstrict will be used regardless of
         # other formats specified. If -fasta is specified and not -msf,
         # -clw, or clwstrict, fasta will be used. If -fasta and -html are
         # specified -fasta will be used. Only if -html is specified alone
         # will html be used. I kid ye not.
         #clw                no              Write output in CLUSTALW format (default is
         #                                   FASTA).
         _Switch(["-clw", "clw"],
                 "Write output in CLUSTALW format (with a MUSCLE header)"),
         #clwstrict          no              Write output in CLUSTALW format with the
         #                                   "CLUSTAL W (1.81)" header rather than the
         #                                   MUSCLE version. This is useful when a post-
         #                                   processing step is picky about the file
         #                                   header.
         _Switch(["-clwstrict", "clwstrict"],
                 "Write output in CLUSTALW format with version 1.81 header"),
         #fasta              yes             Write output in FASTA format. Alternatives
         #                                   include clw,
         #                                   clwstrict, msf and html.
         _Switch(["-fasta", "fasta"],
                 "Write output in FASTA format"),
         #html               no              Write output in HTML format (default is
         #                                   FASTA).
         _Switch(["-html", "html"],
                 "Write output in HTML format"),
         #msf                no              Write output in MSF format (default is
         #                                   FASTA).
         _Switch(["-msf", "msf"],
                 "Write output in MSF format"),
         #Phylip interleaved - undocumented as of 3.7
         _Switch(["-phyi", "phyi"],
                 "Write output in PHYLIP interleaved format"),
         #Phylip sequential - undocumented as of 3.7
         _Switch(["-phys", "phys"],
                 "Write output in PHYLIP sequential format"),
         ################## Additional specified output files #########
         _Option(["-phyiout", "phyiout"],
                 "Write PHYLIP interleaved output to specified filename",
                 filename=True,
                 equate=False),
         _Option(["-physout", "physout"],"Write PHYLIP sequential format to specified filename",
                 filename=True,
                 equate=False),
         _Option(["-htmlout", "htmlout"],"Write HTML output to specified filename",
                 filename=True,
                 equate=False),
         _Option(["-clwout", "clwout"],
                 "Write CLUSTALW output (with MUSCLE header) to specified "
                 "filename",
                 filename=True,
                 equate=False),
         _Option(["-clwstrictout", "clwstrictout"],
                 "Write CLUSTALW output (with version 1.81 header) to "
                 "specified filename",
                 filename=True,
                 equate=False),
         _Option(["-msfout", "msfout"],
                 "Write MSF format output to specified filename",
                 filename=True,
                 equate=False),
         _Option(["-fastaout", "fastaout"],
                 "Write FASTA format output to specified filename",
                 filename=True,
                 equate=False),
         ############## END FORMATS ###################################
         #anchors            yes             Use anchor optimization in tree dependent
         #                                   refinement iterations.
         _Switch(["-anchors", "anchors"],
                 "Use anchor optimisation in tree dependent "
                 "refinement iterations"),
         #noanchors          no              Disable anchor optimization. Default is
         #                                   anchors.
         _Switch(["-noanchors", "noanchors"],
                 "Do not use anchor optimisation in tree dependent "
                 "refinement iterations"),
         #group              yes             Group similar sequences together in the
         #                                   output. This is the default. See also
         #                                   stable.
         _Switch(["-group", "group"],
                 "Group similar sequences in output"),
         #stable             no              Preserve input order of sequences in output
         #                                   file. Default is to group sequences by
         #                                   similarity (group).
         _Switch(["-stable", "stable"],
                 "Do not group similar sequences in output (not supported in v3.8)"),
         ############## log-expectation profile score ######################
         # One of either -le, -sp, or -sv
         #
         # According to the doc, spn is default and the only option for
         # nucleotides: this doesnt appear to be true. -le, -sp, and -sv can
         # be used and produce numerically different logs (what is going on?)
         #
         #spn fails on proteins
         #le                 maybe           Use log-expectation profile score (VTML240).
         #                                    Alternatives are to use sp or sv. This is
         #                                    the default for amino acid sequences.
         _Switch(["-le", "le"],
                 "Use log-expectation profile score (VTML240)"),
         #sv                 no              Use sum-of-pairs profile score (VTML240).
         #                                   Default is le.
         _Switch(["-sv", "sv"],
                 "Use sum-of-pairs profile score (VTML240)"),
         #sp                 no              Use sum-of-pairs protein profile score
         #                                   (PAM200). Default is le.
         _Switch(["-sp", "sp"],
                 "Use sum-of-pairs protein profile score (PAM200)"),
         #spn                maybe           Use sum-of-pairs nucleotide profile score
         #                                   (BLASTZ parameters). This is the only option
         #                                   for nucleotides, and is therefore the
         #                                   default.
         _Switch(["-spn", "spn"],
                 "Use sum-of-pairs protein nucleotide profile score"),
         ############## END log-expectation profile score ######################
         #quiet              no              Do not display progress messages.
         _Switch(["-quiet", "quiet"],
                 "Use sum-of-pairs protein nucleotide profile score"),
         #refine             no              Input file is already aligned, skip first
         #                                   two iterations and begin tree dependent
         #                                   refinement.
         _Switch(["-refine", "refine"],
                 "Only do tree dependent refinement"),
         #core               yes in muscle,  Do not catch exceptions.
         #                   no in muscled.
         _Switch(["-core", "core"],
                 "Catch exceptions"),
         #nocore             no in muscle,   Catch exceptions and give an error message
         #                   yes in muscled. if possible.
         _Switch(["-nocore", "nocore"],
                 "Do not catch exceptions"),
         #termgapsfull       no              Terminal gaps penalized with full penalty.
         #                                   [1] Not fully supported in this version.
         #
         #termgapshalf       yes             Terminal gaps penalized with half penalty.
         #                                   [1] Not fully supported in this version.
         #
         #termgapshalflonger no              Terminal gaps penalized with half penalty if
         #                                   gap relative to
         #                                   longer sequence, otherwise with full
         #                                   penalty.
         #                                   [1] Not fully supported in this version.
         #verbose            no              Write parameter settings and progress
         #                                   messages to log file.
         _Switch(["-verbose", "verbose"],
                 "Write parameter settings and progress"),
         #version            no              Write version string to stdout and exit.
         _Switch(["-version", "version"],
                 "Write version string to stdout and exit"),
        ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #56
0
    def __init__(self, cmd="mafft", **kwargs):
        BLOSUM_MATRICES = ["30","45","62","80"]
        self.parameters = \
            [
            #**** Algorithm ****
            #Automatically selects an appropriate strategy from L-INS-i, FFT-NS-
            #i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2)
            _Switch(["--auto", "auto"],
                    "Automatically select strategy. Default off."),
            #Distance is calculated based on the number of shared 6mers. Default: on
            _Switch(["--6merpair", "6merpair", "sixmerpair"],
                    "Distance is calculated based on the number of shared "
                    "6mers. Default: on"),
            #All pairwise alignments are computed with the Needleman-Wunsch
            #algorithm. More accurate but slower than --6merpair. Suitable for a
            #set of globally alignable sequences. Applicable to up to ~200
            #sequences. A combination with --maxiterate 1000 is recommended (G-
            #INS-i). Default: off (6mer distance is used)
            _Switch(["--globalpair", "globalpair"],
                    "All pairwise alignments are computed with the "
                    "Needleman-Wunsch algorithm. Default: off"),
            #All pairwise alignments are computed with the Smith-Waterman
            #algorithm. More accurate but slower than --6merpair. Suitable for a
            #set of locally alignable sequences. Applicable to up to ~200
            #sequences. A combination with --maxiterate 1000 is recommended (L-
            #INS-i). Default: off (6mer distance is used)
            _Switch(["--localpair", "localpair"],
                    "All pairwise alignments are computed with the "
                    "Smith-Waterman algorithm. Default: off"),
            #All pairwise alignments are computed with a local algorithm with
            #the generalized affine gap cost (Altschul 1998). More accurate but
            #slower than --6merpair. Suitable when large internal gaps are
            #expected. Applicable to up to ~200 sequences. A combination with --
            #maxiterate 1000 is recommended (E-INS-i). Default: off (6mer
            #distance is used)
            _Switch(["--genafpair", "genafpair"],
                    "All pairwise alignments are computed with a local "
                    "algorithm with the generalized affine gap cost "
                    "(Altschul 1998). Default: off"),
            #All pairwise alignments are computed with FASTA (Pearson and Lipman
            #1988). FASTA is required. Default: off (6mer distance is used)
            _Switch(["--fastapair", "fastapair"],
                    "All pairwise alignments are computed with FASTA "
                    "(Pearson and Lipman 1988). Default: off"),
            #Weighting factor for the consistency term calculated from pairwise
            #alignments. Valid when either of --blobalpair, --localpair, --
            #genafpair, --fastapair or --blastpair is selected. Default: 2.7
            _Option(["--weighti", "weighti"],
                    "Weighting factor for the consistency term calculated "
                    "from pairwise alignments. Default: 2.7",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Guide tree is built number times in the progressive stage. Valid
            #with 6mer distance. Default: 2
            _Option(["--retree", "retree"],
                    "Guide tree is built number times in the progressive "
                    "stage. Valid with 6mer distance. Default: 2",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            #Number cycles of iterative refinement are performed. Default: 0
            _Option(["--maxiterate", "maxiterate"],
                    "Number cycles of iterative refinement are performed. "
                    "Default: 0",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            #Use FFT approximation in group-to-group alignment. Default: on
            _Switch(["--fft", "fft"],
                    "Use FFT approximation in group-to-group alignment. "
                    "Default: on"),
            #Do not use FFT approximation in group-to-group alignment. Default:
            #off
            _Switch(["--nofft", "nofft"],
                    "Do not use FFT approximation in group-to-group "
                    "alignment. Default: off"),
            #Alignment score is not checked in the iterative refinement stage.
            #Default: off (score is checked)
            _Switch(["--noscore", "noscore"],
                    "Alignment score is not checked in the iterative "
                    "refinement stage. Default: off (score is checked)"),
            #Use the Myers-Miller (1988) algorithm. Default: automatically
            #turned on when the alignment length exceeds 10,000 (aa/nt).
            _Switch(["--memsave", "memsave"],
                    "Use the Myers-Miller (1988) algorithm. Default: "
                    "automatically turned on when the alignment length "
                    "exceeds 10,000 (aa/nt)."),
            #Use a fast tree-building method (PartTree, Katoh and Toh 2007) with
            #the 6mer distance. Recommended for a large number (> ~10,000) of
            #sequences are input. Default: off
            _Switch(["--parttree", "parttree"],
                    "Use a fast tree-building method with the 6mer "
                    "distance. Default: off"),
            #The PartTree algorithm is used with distances based on DP. Slightly
            #more accurate and slower than --parttree. Recommended for a large
            #number (> ~10,000) of sequences are input. Default: off
            _Switch(["--dpparttree", "dpparttree"],
                    "The PartTree algorithm is used with distances "
                    "based on DP. Default: off"),
            #The PartTree algorithm is used with distances based on FASTA.
            #Slightly more accurate and slower than --parttree. Recommended for
            #a large number (> ~10,000) of sequences are input. FASTA is
            #required. Default: off
            _Switch(["--fastaparttree", "fastaparttree"],
                    "The PartTree algorithm is used with distances based "
                    "on FASTA. Default: off"),
            #The number of partitions in the PartTree algorithm. Default: 50
            _Option(["--partsize", "partsize"],
                    "The number of partitions in the PartTree algorithm. "
                    "Default: 50",
                    checker_function=lambda x: isinstance(x, int),
                    equate=False),
            #Do not make alignment larger than number sequences. Valid only with
            #the --*parttree options. Default: the number of input sequences
            _Switch(["--groupsize", "groupsize"],
                    "Do not make alignment larger than number sequences. "
                    "Default: the number of input sequences"),
            #Adjust direction according to the first sequence 
            #Mafft V6 beta function
            _Switch(["--adjustdirection", "adjustdirection"],
                    "Adjust direction according to the first sequence. "
                    "Default off."),
            #Adjust direction according to the first sequence 
            #for highly diverged data; very slow
            #Mafft V6 beta function
            _Switch(["--adjustdirectionaccurately", "adjustdirectionaccurately"],
                    "Adjust direction according to the first sequence," 
                    "for highly diverged data; very slow"
                    "Default off."),
            #**** Parameter ****
            #Gap opening penalty at group-to-group alignment. Default: 1.53
            _Option(["--op", "op"],
                    "Gap opening penalty at group-to-group alignment. "
                    "Default: 1.53",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Offset value, which works like gap extension penalty, for group-to-
            #group alignment. Deafult: 0.123
            _Option(["--ep", "ep"],
                    "Offset value, which works like gap extension penalty, "
                    "for group-to- group alignment. Default: 0.123",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap opening penalty at local pairwise alignment. Valid when the --
            #localpair or --genafpair option is selected. Default: -2.00
            _Option(["--lop", "lop"],
                    "Gap opening penalty at local pairwise alignment. "
                    "Default: 0.123",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Offset value at local pairwise alignment. Valid when the --
            #localpair or --genafpair option is selected. Default: 0.1
            _Option(["--lep", "lep"],
                    "Offset value at local pairwise alignment. "
                    "Default: 0.1",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap extension penalty at local pairwise alignment. Valid when the -
            #-localpair or --genafpair option is selected. Default: -0.1
            _Option(["--lexp", "lexp"],
                    "Gap extension penalty at local pairwise alignment. "
                    "Default: -0.1",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap opening penalty to skip the alignment. Valid when the --
            #genafpair option is selected. Default: -6.00
            _Option(["--LOP", "LOP"],
                    "Gap opening penalty to skip the alignment. "
                    "Default: -6.00",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),
            #Gap extension penalty to skip the alignment. Valid when the --
            #genafpair option is selected. Default: 0.00
            _Option(["--LEXP", "LEXP"],
                    "Gap extension penalty to skip the alignment. "
                    "Default: 0.00",
                    checker_function=lambda x: isinstance(x, float),
                    equate=False),

            #BLOSUM number matrix (Henikoff and Henikoff 1992) is used.
            #number=30, 45, 62 or 80. Default: 62
            _Option(["--bl", "bl"],
                    "BLOSUM number matrix is used. Default: 62",
                    checker_function=lambda x: x in BLOSUM_MATRICES,
                    equate=False),
            #JTT PAM number (Jones et al. 1992) matrix is used. number>0.
            #Default: BLOSUM62
            _Option(["--jtt", "jtt"],
                    "JTT PAM number (Jones et al. 1992) matrix is used. "
                    "number>0. Default: BLOSUM62",
                    equate=False),
            #Transmembrane PAM number (Jones et al. 1994) matrix is used.
            #number>0. Default: BLOSUM62
            _Option(["--tm", "tm"],
                    "Transmembrane PAM number (Jones et al. 1994) "
                    "matrix is used. number>0. Default: BLOSUM62",
                    filename=True,
                    equate=False),
            #Use a user-defined AA scoring matrix. The format of matrixfile is
            #the same to that of BLAST. Ignored when nucleotide sequences are
            #input. Default: BLOSUM62
            _Option(["--aamatrix", "aamatrix"],
                    "Use a user-defined AA scoring matrix. "
                    "Default: BLOSUM62",
                    filename=True,
                    equate=False),
            #Incorporate the AA/nuc composition information into the scoring
            #matrix. Default: off
            _Switch(["--fmodel", "fmodel"],
                    "Incorporate the AA/nuc composition information into "
                    "the scoring matrix (True) or not (False, default)"),
            #**** Output ****
            #Output format: clustal format. Default: off (fasta format)
            _Switch(["--clustalout", "clustalout"],
                    "Output format: clustal (True) or fasta (False, default)"),
            #Output order: same as input. Default: on
            _Switch(["--inputorder", "inputorder"],
                    "Output order: same as input (True, default) or alignment "
                    "based (False)"),
            #Output order: aligned. Default: off (inputorder)
            _Switch(["--reorder", "reorder"],
                    "Output order: aligned (True) or in input order (False, "
                    "default)"),
            #Guide tree is output to the input.tree file. Default: off
            _Switch(["--treeout", "treeout"],
                    "Guide tree is output to the input.tree file (True) or "
                    "not (False, default)"),
            #Do not report progress. Default: off
            _Switch(["--quiet", "quiet"],
                    "Do not report progress (True) or not (False, default)."),
            #**** Input ****
            #Assume the sequences are nucleotide. Deafult: auto
            _Switch(["--nuc", "nuc"],
                    "Assume the sequences are nucleotide (True/False). "
                    "Default: auto"),
            #Assume the sequences are amino acid. Deafult: auto
            _Switch(["--amino", "amino"],
                    "Assume the sequences are amino acid (True/False). "
                    "Default: auto"),
            ###################### SEEDS #####################################
            # MAFFT has multiple --seed commands where the unaligned input is
            # aligned to the seed alignment. There can be multiple seeds in the
            # form: "mafft --seed align1 --seed align2 [etc] input"
            # Effectively for n number of seed alignments. Here we're going to
            # assume 6 extra are enough
            _Option(["--seed", "seed"],
                    "Seed alignments given in alignment_n (fasta format) "
                    "are aligned with sequences in input.",
                    filename=True,
                    equate=False),
            #The old solution of also defining extra parameters with
            #["--seed", "seed1"] etc worked, but clashes with the recent
            #code in the base class to look for duplicate paramters and raise
            #an error.  Perhaps that check should be ignored here, or maybe
            #we can handle this more elegantly...
            #TODO - Create an _OptionList parameter which allows a list to be
            #assigned to the value?
            ####################### END SEEDS  ################################
            #The input (must be FASTA format)
            _Argument(["input"],
                      "Input file name",
                      filename=True,
                      is_required=True),
            ###################################################################
            #mafft-profile takes a second alignment input as an argument:
            #mafft-profile align1 align2
            _Argument(["input1"],
                      "Second input file name for the mafft-profile command",
                      filename=True),
            ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #57
0
    def __init__(self, cmd="XXmotif", **kwargs):
        # order of parameters is the same as in XXmotif --help
        _valid_alphabet = set("ACGTNX")

        self.parameters = \
          [
          _Argument(["outdir", "OUTDIR"],
                   "output directory for all results",
                   filename = True,
                   is_required = True,
                   # XXmotif currently does not accept spaces in the outdir name
                   checker_function = lambda x: " " not in x),
          _Argument(["seqfile", "SEQFILE"],
                   "file name with sequences from positive set in FASTA format",
                   filename = True,
                   is_required = True,
                   # XXmotif currently only accepts a pure filename
                   checker_function = lambda x: os.path.split(x)[0] == ""),

          # Options
          _Option(["--negSet", "negSet", "NEGSET", "negset"],
                   "sequence set which has to be used as a reference set",
                   filename = True,
                   equate = False),
          _Switch(["--zoops", "ZOOPS", "zoops"],
                   "use zero-or-one occurrence per sequence model (DEFAULT)"),
          _Switch(["--mops", "MOPS", "mops"],
                   "use multiple occurrence per sequence model"),
          _Switch(["--oops", "OOPS", "oops"],
                   "use one occurrence per sequence model"),
          _Switch(["--revcomp", "REVCOMP", "revcomp"],
                   "search in reverse complement of sequences as well (DEFAULT: NO)"),
          _Option(["--background-model-order", "background-model-order", "BACKGROUND-MODEL-ORDER",
                   "background_model_order"],
                   "order of background distribution (DEFAULT: 2, 8(--negset) )",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["--pseudo", "PSEUDO", "pseudo"],
                   "percentage of pseudocounts used (DEFAULT: 10)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["-g", "--gaps", "GAPS", "gaps"],
                   "maximum number of gaps used for start seeds [0-3] (DEFAULT: 0)",
                   checker_function = lambda x: x in [0-3],
                   equate = False),
          _Option(["--type", "TYPE", "type"],
                   "defines what kind of start seeds are used (DEFAULT: ALL)"
                   "possible types: ALL, FIVEMERS, PALINDROME, TANDEM, NOPALINDROME, NOTANDEM",
                   checker_function = lambda x: x in ["ALL", "all",
                                                      "FIVEMERS", "fivemers",
                                                      "PALINDROME", "palindrome",
                                                      "TANDEM", "tandem",
                                                      "NOPALINDROME", "nopalindrome",
                                                      "NOTANDEM", "notandem"],
                   equate = False),
          _Option(["--merge-motif-threshold", "merge-motif-threshold", "MERGE-MOTIF-THRESHOLD",
                   "merge_motif_threshold"],
                   "defines the similarity threshold for merging motifs (DEFAULT: HIGH)"
                   "possible modes: LOW, MEDIUM, HIGH",
                   checker_function = lambda x: x in ["LOW", "low",
                                                      "MEDIUM", "medium",
                                                      "HIGH", "high"],
                   equate = False),
          _Switch(["--no-pwm-length-optimization", "no-pwm-length-optimization", "NO-PWM-LENGTH-OPTIMIZATION",
                   "no_pwm_length_optimization"],
                   "do not optimize length during iterations (runtime advantages)"),
          _Option(["--max-match-positions", "max-match-positions", "MAX-MATCH-POSITIONS",
                   "max_match_positions"],
                   "max number of positions per motif (DEFAULT: 17, higher values will lead to very long runtimes)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Switch(["--batch", "BATCH", "batch"],
                   "suppress progress bars (reduce output size for batch jobs)"),
          _Option(["--maxPosSetSize", "maxPosSetSize", "MAXPOSSETSIZE", "maxpossetsize"],
                   "maximum number of sequences from the positive set used [DEFAULT: all]",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          # does not make sense in biopython
          #_Switch(["--help", "help", "HELP"],
          #         "print this help page"),
          _Option(["--trackedMotif", "trackedMotif", "TRACKEDMOTIF", "trackedmotif"],
                   "inspect extensions and refinement of a given seed (DEFAULT: not used)",
                   checker_function = lambda x: any((c in _valid_alphabet) for c in x),
                   equate = False),

          # Using conservation information
          _Option(["--format", "FORMAT", "format"],
                   "defines what kind of format the input sequences have (DEFAULT: FASTA)",
                   checker_function = lambda x: x in ["FASTA", "fasta",
                                                      "MFASTA", "mfasta"],
                   equate = False),
          _Option(["--maxMultipleSequences", "maxMultipleSequences", "MAXMULTIPLESEQUENCES",
                   "maxmultiplesequences"],
                   "maximum number of sequences used in an alignment [DEFAULT: all]",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # Using localization information
          _Switch(["--localization", "LOCALIZATION", "localization"],
                   "use localization information to calculate combined P-values"
                   "(sequences should have all the same length)"),
          _Option(["--downstream", "DOWNSTREAM", "downstream"],
                   "number of residues in positive set downstream of anchor point (DEFAULT: 0)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # Start with self defined motif
          _Option(["-m", "--startMotif", "startMotif", "STARTMOTIF", "startmotif"],
                   "Start motif (IUPAC characters)",
                   checker_function = lambda x: any((c in _valid_alphabet) for c in x),
                   equate = False),
          _Option(["-p", "--profileFile", "profileFile", "PROFILEFILE", "profilefile"],
                   "profile file",
                   filename = True,
                   equate = False),
          _Option(["--startRegion", "startRegion", "STARTREGION", "startregion"],
                   "expected start position for motif occurrences relative to anchor point (--localization)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),
          _Option(["--endRegion", "endRegion", "ENDREGION", "endregion"],
                   "expected end position for motif occurrences relative to anchor point (--localization)",
                   checker_function = lambda x: isinstance(x, int),
                   equate = False),

          # XXmotif wrapper options
          _Switch(["--XXmasker", "masker"],
                   "mask the input sequences for homology, repeats and low complexity regions"),
          _Switch(["--XXmasker-pos", "maskerpos"],
                   "mask only the positive set for homology, repeats and low complexity regions"),
          _Switch(["--no-graphics", "nographics"],
                   "run XXmotif without graphical output"),
          ]
        AbstractCommandline.__init__(self, cmd, **kwargs)
Example #58
0
 def __init__(self, cmd="prank", **kwargs):
     """Initialize the class."""
     OUTPUT_FORMAT_VALUES = list(range(1, 18))
     self.parameters = [
         # ################# input/output parameters: ##################
         # -d=sequence_file
         _Option(["-d", "d"],
                 "Input filename",
                 filename=True,
                 is_required=True),
         # -t=tree_file [default: no tree, generate approximate NJ tree]
         _Option(["-t", "t"], "Input guide tree filename", filename=True),
         # -tree="tree_string" [tree in newick format; in double quotes]
         _Option(["-tree", "tree"], "Input guide tree as Newick string"),
         # -m=model_file [default: HKY2/WAG]
         _Option(["-m", "m"],
                 "User-defined alignment model filename. Default: "
                 "HKY2/WAG"),
         # -o=output_file [default: 'output']
         _Option(["-o", "o"],
                 "Output filenames prefix. Default: 'output'\n "
                 "Will write: output.?.fas (depending on requested "
                 "format), output.?.xml and output.?.dnd",
                 filename=True),
         # -f=output_format [default: 8]
         _Option(["-f", "f"], "Output alignment format. Default: 8 FASTA\n"
                 "Option are:\n"
                 "1. IG/Stanford	8. Pearson/Fasta\n"
                 "2. GenBank/GB 	11. Phylip3.2\n"
                 "3. NBRF       	12. Phylip\n"
                 "4. EMBL       	14. PIR/CODATA\n"
                 "6. DNAStrider 	15. MSF\n"
                 "7. Fitch      	17. PAUP/NEXUS",
                 checker_function=lambda x: x in OUTPUT_FORMAT_VALUES),
         _Switch(["-noxml", "noxml"], "Do not output XML files "
                 "(PRANK versions earlier than v.120626)"),
         _Switch(["-notree", "notree"], "Do not output dnd tree files "
                 "(PRANK versions earlier than v.120626)"),
         _Switch(["-showxml", "showxml"],
                 "Output XML files (PRANK v.120626 and later)"),
         _Switch(["-showtree", "showtree"],
                 "Output dnd tree files (PRANK v.120626 and later)"),
         _Switch(["-shortnames", "shortnames"],
                 "Truncate names at first space"),
         _Switch(["-quiet", "quiet"], "Reduce verbosity"),
         # ###################### model parameters: ######################
         # +F [force insertions to be always skipped]
         # -F [equivalent]
         _Switch(["-F", "+F", "F"],
                 "Force insertions to be always skipped: same as +F"),
         # -dots [show insertion gaps as dots]
         _Switch(["-dots", "dots"], "Show insertion gaps as dots"),
         # -gaprate=# [gap opening rate; default: dna 0.025 / prot 0.0025]
         _Option(["-gaprate", "gaprate"],
                 "Gap opening rate. Default: dna 0.025 prot 0.0025",
                 checker_function=lambda x: isinstance(x, float)),
         # -gapext=# [gap extension probability; default: dna 0.5 / prot 0.5]
         _Option(["-gapext", "gapext"],
                 "Gap extension probability. Default: dna 0.5 "
                 "/ prot 0.5",
                 checker_function=lambda x: isinstance(x, float)),
         # -dnafreqs=#,#,#,# [ACGT; default: empirical]
         _Option(["-dnafreqs", "dnafreqs"],
                 "DNA frequencies - 'A,C,G,T'. eg '25,25,25,25' as a quote "
                 "surrounded string value. Default: empirical",
                 checker_function=lambda x: isinstance(x, bytes)),
         # -kappa=# [ts/tv rate ratio; default:2]
         _Option(["-kappa", "kappa"],
                 "Transition/transversion ratio. Default: 2",
                 checker_function=lambda x: isinstance(x, int)),
         # -rho=# [pur/pyr rate ratio; default:1]
         _Option(["-rho", "rho"],
                 "Purine/pyrimidine ratio. Default: 1",
                 checker_function=lambda x: isinstance(x, int)),
         # -codon [for DNA: use empirical codon model]
         _Switch(["-codon", "codon"], "Codon aware alignment or not"),
         # -termgap [penalise terminal gaps normally]
         _Switch(["-termgap", "termgap"],
                 "Penalise terminal gaps normally"),
         # ############### other parameters: ################################
         # -nopost [do not compute posterior support; default: compute]
         _Switch(["-nopost", "nopost"],
                 "Do not compute posterior support. Default: compute"),
         # -pwdist=# [expected pairwise distance for computing guidetree;
         # default: dna 0.25 / prot 0.5]
         _Option(["-pwdist", "pwdist"],
                 "Expected pairwise distance for computing guidetree. "
                 "Default: dna 0.25 / prot 0.5",
                 checker_function=lambda x: isinstance(x, float)),
         _Switch(["-once", "once"],
                 "Run only once. Default: twice if no guidetree given"),
         _Switch(["-twice", "twice"], "Always run twice"),
         _Switch(["-skipins", "skipins"],
                 "Skip insertions in posterior support"),
         _Switch(
             ["-uselogs", "uselogs"],
             "Slower but should work for a greater number of sequences"),
         _Switch(["-writeanc", "writeanc"], "Output ancestral sequences"),
         _Switch(["-printnodes", "printnodes"],
                 "Output each node; mostly for debugging"),
         # -matresize=# [matrix resizing multiplier]
         # Doesn't specify type but Float and Int work
         _Option(["-matresize", "matresize"],
                 "Matrix resizing multiplier",
                 checker_function=lambda x:
                 (isinstance(x, float) or isinstance(x, int))),
         # -matinitsize=# [matrix initial size multiplier]
         # Doesn't specify type but Float and Int work
         _Option(["-matinitsize", "matinitsize"],
                 "Matrix initial size multiplier",
                 checker_function=lambda x:
                 (isinstance(x, float) or isinstance(x, int))),
         _Switch(["-longseq", "longseq"],
                 "Save space in pairwise alignments"),
         _Switch(["-pwgenomic", "pwgenomic"],
                 "Do pairwise alignment, no guidetree"),
         # -pwgenomicdist=# [distance for pairwise alignment; default: 0.3]
         _Option(["-pwgenomicdist", "pwgenomicdist"],
                 "Distance for pairwise alignment. Default: 0.3",
                 checker_function=lambda x: isinstance(x, float)),
         # -scalebranches=# [scale branch lengths; default: dna 1 / prot 2]
         _Option(["-scalebranches", "scalebranches"],
                 "Scale branch lengths. Default: dna 1 / prot 2",
                 checker_function=lambda x: isinstance(x, int)),
         # -fixedbranches=# [use fixed branch lengths]
         # Assume looking for a float
         _Option(["-fixedbranches", "fixedbranches"],
                 "Use fixed branch lengths of input value",
                 checker_function=lambda x: isinstance(x, float)),
         # -maxbranches=# [set maximum branch length]
         # Assume looking for a float
         _Option(["-maxbranches", "maxbranches"],
                 "Use maximum branch lengths of input value",
                 checker_function=lambda x: isinstance(x, float)),
         # -realbranches [disable branch length truncation]
         _Switch(["-realbranches", "realbranches"],
                 "Disable branch length truncation"),
         _Switch(["-translate", "translate"], "Translate to protein"),
         _Switch(["-mttranslate", "mttranslate"],
                 "Translate to protein using mt table"),
         # ##################### other: ####################
         _Switch(["-convert", "convert"],
                 "Convert input alignment to new format. Do "
                 "not perform alignment")
     ]
     AbstractCommandline.__init__(self, cmd, **kwargs)
Example #59
0
    def __init__(self, cmd='fasttree', **kwargs):
        """Initialize the class."""
        self.parameters = [
            _Switch(
                ['-nt', 'nt'],
                "By default FastTree expects protein alignments, use -nt for nucleotides",
            ),
            _Option(
                ['-n', 'n'],
                """-n -- read N multiple alignments in.

                    This only works with phylip interleaved format. For example, you can
                    use it with the output from phylip's seqboot. If you use -n, FastTree
                    will write 1 tree per line to standard output.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-quote', 'quote'],
                """-quote -- add quotes to sequence names in output.

                    Quote sequence names in the output and allow spaces, commas,
                    parentheses, and colons in them but not ' characters (fasta files only).
                    """,
            ),
            _Option(
                ['-pseudo', 'pseudo'],
                """-pseudo [weight] -- Pseudocounts are used with sequence distance estimation.

                    Use pseudocounts to estimate distances between sequences with little or no
                    overlap. (Off by default.) Recommended if analyzing the alignment has
                    sequences with little or no overlap.
                    If the weight is not specified, it is 1.0
                    """,
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-boot', 'boot'],
                """Specify the number of resamples for support values.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1

                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-nosupport', 'nosupport'],
                """Turn off support values.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1

                    Use -nosupport to turn off support values or -boot 100 to use just 100 resamples.
                    """,
            ),
            _Option(
                ['-intree', 'intree'],
                """-intree newickfile -- read the starting tree in from newickfile.

                    Any branch lengths in the starting trees are ignored.
                    -intree with -n will read a separate starting tree for each alignment.
                    """,
                filename=True,
                equate=False,
            ),
            _Option(
                ['-intree1', 'intree1'],
                "intree1 newickfile -- read the same starting tree for each alignment.",
                filename=True,
                equate=False,
            ),
            _Switch(
                ['-quiet', 'quiet'],
                """-quiet -- do not write to standard error during normal operation

                    (no progress indicator, no options summary, no likelihood values, etc.)
                    """,
            ),
            _Switch(
                ['-nopr', 'nopr'],
                "-nopr -- do not write the progress indicator to stderr.",
            ),
            _Option(
                ['-nni', 'nni'],
                """Set the rounds of minimum-evolution nearest-neighbor interchanges

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Option(
                ['-spr', 'spr'],
                """Set the rounds of subtree-prune-regraft moves

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-noml', 'noml'],
                """Deactivate min-evo NNIs and SPRs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -noml to turn off both min-evo NNIs and SPRs (useful if refining
                    an approximately maximum-likelihood tree with further NNIs).
                    """,
            ),
            _Switch(
                ['-mllen', 'mllen'],
                """Optimize branch lengths on a fixed topology.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mllen to optimize branch lengths without ML NNIs
                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology.
                    """,
            ),
            _Switch(
                ['-nome', 'nome'],
                """Changes support values calculation to a minimum-evolution bootstrap method.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mllen to optimize branch lengths without ML NNIs
                    Use -mllen -nome with -intree to optimize branch lengths on a fixed topology

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1.
                    """,
            ),
            _Option(
                ['-mlnni', 'mlnni'],
                """Set the number of rounds of maximum-likelihood NNIs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mlnni to set the number of rounds of maximum-likelihood NNIs.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Option(
                ['-mlacc', 'mlacc'],
                """Option for optimization of branches at each NNI.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -mlacc 2 or -mlacc 3 to always optimize all 5 branches at each NNI,
                    and to optimize all 5 branches in 2 or 3 rounds.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-slownni', 'slownni'],
                """Turn off heuristics to avoid constant subtrees with NNIs.

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    Use -slownni to turn off heuristics to avoid constant subtrees
                    (affects both ML and ME NNIs).
                    """,
            ),
            _Switch(
                ['-wag', 'wag'],
                """Maximum likelihood model options.

                    Whelan-And-Goldman 2001 model instead of (default)
                    Jones-Taylor-Thorton 1992 model (a.a. only)
                    """,
            ),
            _Switch(
                ['-gtr', 'gtr'],
                """Maximum likelihood model options.

                    Use generalized time-reversible instead of (default)
                    Jukes-Cantor (nt only)
                    """,
            ),
            _Option(
                ['-cat', 'cat'],
                """Maximum likelihood model options.

                    Specify the number of rate categories of sites (default 20).""",
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-nocat', 'nocat'],
                "Maximum likelihood model options: No CAT model (just 1 category)",
            ),
            _Switch(
                ['-gamma', 'gamma'],
                """Report the likelihood under the discrete gamma model.

                    Maximum likelihood model options:
                    -gamma -- after the final round of optimizing branch lengths with the CAT model,
                    report the likelihood under the discrete gamma model with the same
                    number of categories. FastTree uses the same branch lengths but
                    optimizes the gamma shape parameter and the scale of the lengths.
                    The final tree will have rescaled lengths. Used with -log, this
                    also generates per-site likelihoods for use with CONSEL, see
                    GammaLogToPaup.pl and documentation on the FastTree web site.
                    """,
            ),
            _Switch(
                ['-slow', 'slow'],
                """Use an exhaustive search.

                    Searching for the best join:
                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
                    local hill-climbing as in relaxed neighbor-joining
                    -slow -- exhaustive search (like NJ or BIONJ, but different gap handling)
                    -slow takes half an hour instead of 8 seconds for 1,250 proteins
                    """,
            ),
            _Switch(
                ['-fastest', 'fastest'],
                """Search the visible set (the top hit for each node) only.

                    Searching for the best join:
                    By default, FastTree combines the 'visible set' of fast neighbor-joining with
                    local hill-climbing as in relaxed neighbor-joining
                    -fastest -- search the visible set (the top hit for each node) only
                    Unlike the original fast neighbor-joining, -fastest updates visible(C)
                    after joining A and B if join(AB,C) is better than join(C,visible(C))
                    -fastest also updates out-distances in a very lazy way,
                    -fastest sets -2nd on as well, use -fastest -no2nd to avoid this
                    """,
            ),
            _Switch(
                ['-2nd', 'second'],
                """Turn 2nd-level top hits heuristic on.

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other

                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
                    This reduces memory usage and running time but may lead to
                    marginal reductions in tree quality.
                    (By default, -fastest turns on -2nd.)
                    """,
            ),
            _Switch(
                ['-no2nd', 'no2nd'],
                """Turn 2nd-level top hits heuristic off.

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other

                    -2nd or -no2nd to turn 2nd-level top hits heuristic on or off
                    This reduces memory usage and running time but may lead to
                    marginal reductions in tree quality.
                    (By default, -fastest turns on -2nd.)
                    """,
            ),
            _Option(
                ['-seed', 'seed'],
                """Use -seed to initialize the random number generator.

                    Support value options:
                    By default, FastTree computes local support values by resampling the site
                    likelihoods 1,000 times and the Shimodaira Hasegawa test. If you specify -nome,
                    it will compute minimum-evolution bootstrap supports instead
                    In either case, the support values are proportions ranging from 0 to 1.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(
                ['-top', 'top'],
                """Top-hit list to speed up search

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other.
                    """,
            ),
            _Switch(
                ['-notop', 'notop'],
                """Turn off top-hit list to speed up search

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    Use -notop (or -slow) to turn this feature off
                    and compare all leaves to each other,
                    and all new joined nodes to each other.
                    """,
            ),
            _Option(
                ['-topm', 'topm'],
                """Change the top hits calculation method

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -topm 1.0 -- set the top-hit list size to parameter*sqrt(N)
                    FastTree estimates the top m hits of a leaf from the
                    top 2*m hits of a 'close' neighbor, where close is
                    defined as d(seed,close) < 0.75 * d(seed, hit of rank 2*m),
                    and updates the top-hits as joins proceed.
                    """,
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-close', 'close'],
                """Modify the close heuristic for the top-hit list

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -close 0.75 -- modify the close heuristic, lower is more conservative.
                    """,
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-refresh', 'refresh'],
                """Parameter for conditions that joined nodes are compared to other nodes

                    Top-hit heuristics:
                    By default, FastTree uses a top-hit list to speed up search
                    -refresh 0.8 -- compare a joined node to all other nodes if its
                    top-hit list is less than 80% of the desired length,
                    or if the age of the top-hit list is log2(m) or greater.
                    """,
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-matrix', 'matrix'],
                """Specify a matrix for nucleotide or amino acid distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix
                    """,
                filename=True,
                equate=False,
            ),
            _Switch(
                ['-nomatrix', 'nomatrix'],
                """Specify that no matrix should be used for nucleotide or amino acid distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix
                    """,
            ),
            _Switch(
                ['-nj', 'nj'],
                "Join options: regular (unweighted) neighbor-joining (default)",
            ),
            _Switch(
                ['-bionj', 'bionj'],
                """Join options: weighted joins as in BIONJ.

                    FastTree will also weight joins during NNIs.
                    """,
            ),
            _Option(
                ['-gtrrates', 'gtrrates'],
                "-gtrrates ac ag at cg ct gt",
                equate=False,
            ),
            _Option(
                ['-gtrfreq', 'gtrfreq'],
                "-gtrfreq A C G T",
                equate=False,
            ),
            _Option(
                ['-constraints', 'constraints'],
                """Specifies an alignment file for use with constrained topology searching

                    Constrained topology search options:
                    -constraints alignmentfile -- an alignment with values of 0, 1, and -
                    Not all sequences need be present. A column of 0s and 1s defines a
                    constrained split. Some constraints may be violated
                    (see 'violating constraints:' in standard error).
                    """,
                filename=True,
                equate=False,
            ),
            _Option(
                ['-constraintWeight', 'constraintWeight'],
                """Weight strength of contraints in topology searching.

                    Constrained topology search options:
                    -constraintWeight -- how strongly to weight the constraints. A value of 1
                    means a penalty of 1 in tree length for violating a constraint
                    Default: 100.0
                    """,
                checker_function=_is_numeric,
                equate=False,
            ),
            _Option(
                ['-log', 'log'],
                """Create log files of data such as intermediate trees and per-site rates

                    -log logfile -- save intermediate trees so you can extract
                    the trees and restart long-running jobs if they crash
                    -log also reports the per-site rates (1 means slowest category).
                    """,
                filename=True,
                equate=False,
            ),
            _Option(
                ['-makematrix', 'makematrix'],
                "-makematrix [alignment]",
                filename=True,
                equate=False,
            ),
            _Switch(
                ['-rawdist', 'rawdist'],
                """Turn off or adjust log-correction in AA or NT distances.

                    Use -rawdist to turn the log-correction off or to use
                    %different instead of Jukes-Cantor in AA or NT distances

                    Distances:
                    Default: For protein sequences, log-corrected distances and an
                    amino acid dissimilarity matrix derived from BLOSUM45
                    or for nucleotide sequences, Jukes-Cantor distances
                    To specify a different matrix, use -matrix FilePrefix or -nomatrix
                    """,
            ),
            _Option(
                ['-sprlength', 'sprlength'],
                """Set maximum SPR move length in topology refinement (default 10).

                    Topology refinement:
                    By default, FastTree tries to improve the tree with up to 4*log2(N)
                    rounds of minimum-evolution nearest-neighbor interchanges (NNI),
                    where N is the number of unique sequences, 2 rounds of
                    subtree-prune-regraft (SPR) moves (also min. evo.), and
                    up to 2*log(N) rounds of maximum-likelihood NNIs.
                    Use -nni to set the number of rounds of min. evo. NNIs,
                    and -spr to set the rounds of SPRs.
                    """,
                checker_function=_is_int,
                equate=False,
            ),
            _Switch(['-help', 'help'], "Show the help."),
            _Switch(['-expert', 'expert'], "Show the expert level help."),
            _Option(
                ['-out', 'out'],
                """Enter <output file>

                    The path to a Newick Tree output file needs to be specified.
                    """,
                filename=True,
                equate=False,
            ),
            _Argument(
                ['input'],
                """Enter <input file>

                      An input file of sequence alignments in fasta or phylip format
                      is needed.  By default FastTree expects protein
                      alignments, use -nt for nucleotides.
                      """,
                filename=True,
                is_required=True,
            ),
        ]

        AbstractCommandline.__init__(self, cmd, **kwargs)