Ejemplo n.º 1
0
def parse_args():
    parser = ArgumentParser(description="Build a reference tree for EPA taxonomic placement.",
    epilog="Example: ./epa_trainer.py -t example/training_tax.txt -s example/training_seq.fa -r example/ref.json",
    formatter_class=RawTextHelpFormatter)
    parser.add_argument("-t", dest="taxonomy_fname",
            help="""Reference taxonomy file.""")
    parser.add_argument("-s", dest="align_fname",
            help="""Reference alignment file. Sequences must be aligned, their IDs must correspond to those
in taxonomy file.""")
    parser.add_argument("-r", dest="ref_fname",
            help="""Reference output file. It will contain reference alignment, phylogenetic tree and other
information needed for taxonomic placement of query sequences.""")
    parser.add_argument("-T", dest="num_threads", type=int, default=None,
            help="""Specify the number of CPUs.  Default: 2""")            
    parser.add_argument("-c", dest="config_fname", default=None,
            help="""Config file name.""")
    parser.add_argument("-C", dest="compress_patterns", default=False, action="store_true",
            help="""Enable pattern compression during model optimization under GTRCAT. Default: FALSE""")
    parser.add_argument("-n", dest="output_name", default=None,
            help="""Run name.""")
    parser.add_argument("-m", dest="mfresolv_method", choices=["thorough", "fast", "ultrafast"],
            default="thorough", help="""Method of multifurcation resolution: 
            thorough    use stardard constrainted RAxML tree search (default)
            fast        use RF distance as search convergence criterion (RAxML -D option)
            ultrafast   optimize model+branch lengths only (RAxML -f e option)""")
    parser.add_argument("-v", dest="verbose", action="store_true",
            help="""Print additional info messages to the console.""")
    parser.add_argument("-debug", dest="debug", action="store_true",
            help="""Debug mode, intermediate files will not be cleaned up.""")
    parser.add_argument("-no-hmmer", dest="no_hmmer", action="store_true",
            help="""Do not build HMMER profile.""")
    parser.add_argument("-dup-rank-names", dest="dup_rank_names", choices=["ignore", "abort", "skip", "autofix"],
            default="ignore", help="""Action to be performed if different ranks with same name are found: 
            ignore      do nothing
            abort       report duplicates and exit
            skip        skip the corresponding sequences (exlude from reference)
            autofix     make name unique by concatenating it with the parent rank's name""")
    parser.add_argument("-wrong-rank-count", dest="wrong_rank_count", choices=["ignore", "abort", "skip", "autofix"],
            default="ignore", help="""Action to be performed if lineage has less (more) than 7 ranks
            ignore      do nothing
            abort       report duplicates and exit
            skip        skip the corresponding sequences (exlude from reference)
            autofix     try to guess wich ranks should be added or removed (use with caution!)""")
    parser.add_argument("-tmpdir", dest="temp_dir", default=None,
            help="""Directory for temporary files.""")
    
    if len(sys.argv) < 4:
        parser.print_help()
        sys.exit()

    args = parser.parse_args()
    
    return args
Ejemplo n.º 2
0
def parse_args():
    parser = ArgumentParser(description="Build a reference tree for EPA taxonomic placement.",
    epilog="Example: ./epa_trainer.py -t example/training_tax.txt -s example/training_seq.fa -n myref",
    formatter_class=RawTextHelpFormatter)
    parser.add_argument("-t", dest="taxonomy_fname", required=True,
            help="""Reference taxonomy file.""")
    parser.add_argument("-s", dest="align_fname", required=True,
            help="""Reference alignment file. Sequences must be aligned, their IDs must correspond to those
in taxonomy file.""")
    parser.add_argument("-r", dest="ref_fname",
            help="""Reference output file. It will contain reference alignment, phylogenetic tree and other
information needed for taxonomic placement of query sequences.""")
    parser.add_argument("-T", dest="num_threads", type=int, default=None,
            help="""Specify the number of CPUs.  Default: %d""" % multiprocessing.cpu_count())            
    parser.add_argument("-c", dest="config_fname", default=None,
            help="""Config file name.""")
    parser.add_argument("-o", dest="output_dir", default=".",
            help="""Output directory""")
    parser.add_argument("-n", dest="output_name", default=None,
            help="""Run name.""")
    parser.add_argument("-p", dest="rand_seed", type=int, default=None,
            help="""Random seed to be used with RAxML. Default: current system time.""")
    parser.add_argument("-m", dest="mfresolv_method", choices=["thorough", "fast", "ultrafast"],
            default="thorough", help="""Method of multifurcation resolution: 
            thorough    use stardard constrainted RAxML tree search (default)
            fast        use RF distance as search convergence criterion (RAxML -D option)
            ultrafast   optimize model+branch lengths only (RAxML -f e option)""")
    parser.add_argument("-N", dest="rep_num", type=int, default=1, 
            help="""Number of RAxML tree searches (with distinct random seeds). Default: 1""")
    parser.add_argument("-x", dest="taxcode_name", choices=["bac", "bot", "zoo", "vir"], type = str.lower,
            help="""Taxonomic code: BAC(teriological), BOT(anical), ZOO(logical), VIR(ological)""")
    parser.add_argument("-R", dest="restart", action="store_true",
            help="""Resume execution after a premature termination (e.g., due to expired job time limit).
Run name of the previous (terminated) job must be specified via -n option.""")
    parser.add_argument("-v", dest="verbose", action="store_true",
            help="""Print additional info messages to the console.""")
    parser.add_argument("-debug", dest="debug", action="store_true",
            help="""Debug mode, intermediate files will not be cleaned up.""")
    parser.add_argument("-no-hmmer", dest="no_hmmer", action="store_true",
            help="""Do not build HMMER profile.""")
    parser.add_argument("-dup-rank-names", dest="dup_rank_names", choices=["ignore", "abort", "skip", "autofix"],
            default="ignore", help="""Action to be performed if different ranks with same name are found: 
            ignore      do nothing
            abort       report duplicates and exit
            skip        skip the corresponding sequences (exlude from reference)
            autofix     make name unique by concatenating it with the parent rank's name""")
    parser.add_argument("-wrong-rank-count", dest="wrong_rank_count", choices=["ignore", "abort", "skip", "autofix"],
            default="ignore", help="""Action to be performed if lineage has less (more) than 7 ranks
            ignore      do nothing
            abort       report duplicates and exit
            skip        skip the corresponding sequences (exlude from reference)
            autofix     try to guess wich ranks should be added or removed (use with caution!)""")
    parser.add_argument("-tmpdir", dest="temp_dir", default=None,
            help="""Directory for temporary files.""")
    
    if len(sys.argv) < 4:
        parser.print_help()
        sys.exit()

    args = parser.parse_args()
    
    return args
Ejemplo n.º 3
0
def parse_args():
    parser = ArgumentParser(
        description="Build a reference tree for EPA taxonomic placement.",
        epilog=
        "Example: ./epa_trainer.py -t example/training_tax.txt -s example/training_seq.fa -n myref",
        formatter_class=RawTextHelpFormatter)
    parser.add_argument("-t",
                        dest="taxonomy_fname",
                        required=True,
                        help="""Reference taxonomy file.""")
    parser.add_argument(
        "-s",
        dest="align_fname",
        required=True,
        help=
        """Reference alignment file. Sequences must be aligned, their IDs must correspond to those
in taxonomy file.""")
    parser.add_argument(
        "-r",
        dest="ref_fname",
        help=
        """Reference output file. It will contain reference alignment, phylogenetic tree and other
information needed for taxonomic placement of query sequences.""")
    parser.add_argument("-T",
                        dest="num_threads",
                        type=int,
                        default=None,
                        help="""Specify the number of CPUs.  Default: %d""" %
                        multiprocessing.cpu_count())
    parser.add_argument("-c",
                        dest="config_fname",
                        default=None,
                        help="""Config file name.""")
    parser.add_argument("-o",
                        dest="output_dir",
                        default=".",
                        help="""Output directory""")
    parser.add_argument("-n",
                        dest="output_name",
                        default=None,
                        help="""Run name.""")
    parser.add_argument(
        "-p",
        dest="rand_seed",
        type=int,
        default=None,
        help=
        """Random seed to be used with RAxML. Default: current system time.""")
    parser.add_argument("-m",
                        dest="mfresolv_method",
                        choices=["thorough", "fast", "ultrafast"],
                        default="thorough",
                        help="""Method of multifurcation resolution: 
            thorough    use stardard constrainted RAxML tree search (default)
            fast        use RF distance as search convergence criterion (RAxML -D option)
            ultrafast   optimize model+branch lengths only (RAxML -f e option)"""
                        )
    parser.add_argument(
        "-N",
        dest="rep_num",
        type=int,
        default=1,
        help=
        """Number of RAxML tree searches (with distinct random seeds). Default: 1"""
    )
    parser.add_argument(
        "-x",
        dest="taxcode_name",
        choices=["bac", "bot", "zoo", "vir"],
        type=str.lower,
        help=
        """Taxonomic code: BAC(teriological), BOT(anical), ZOO(logical), VIR(ological)"""
    )
    parser.add_argument(
        "-R",
        dest="restart",
        action="store_true",
        help=
        """Resume execution after a premature termination (e.g., due to expired job time limit).
Run name of the previous (terminated) job must be specified via -n option.""")
    parser.add_argument(
        "-v",
        dest="verbose",
        action="store_true",
        help="""Print additional info messages to the console.""")
    parser.add_argument(
        "-debug",
        dest="debug",
        action="store_true",
        help="""Debug mode, intermediate files will not be cleaned up.""")
    parser.add_argument("-no-hmmer",
                        dest="no_hmmer",
                        action="store_true",
                        help="""Do not build HMMER profile.""")
    parser.add_argument(
        "-dup-rank-names",
        dest="dup_rank_names",
        choices=["ignore", "abort", "skip", "autofix"],
        default="ignore",
        help=
        """Action to be performed if different ranks with same name are found: 
            ignore      do nothing
            abort       report duplicates and exit
            skip        skip the corresponding sequences (exlude from reference)
            autofix     make name unique by concatenating it with the parent rank's name"""
    )
    parser.add_argument(
        "-wrong-rank-count",
        dest="wrong_rank_count",
        choices=["ignore", "abort", "skip", "autofix"],
        default="ignore",
        help="""Action to be performed if lineage has less (more) than 7 ranks
            ignore      do nothing
            abort       report duplicates and exit
            skip        skip the corresponding sequences (exlude from reference)
            autofix     try to guess wich ranks should be added or removed (use with caution!)"""
    )
    parser.add_argument("-tmpdir",
                        dest="temp_dir",
                        default=None,
                        help="""Directory for temporary files.""")

    if len(sys.argv) < 4:
        parser.print_help()
        sys.exit()

    args = parser.parse_args()

    return args
Ejemplo n.º 4
0
def parse_args():
    parser = ArgumentParser(usage="%(prog)s -s ALIGNMENT -t TAXONOMY -x {BAC,BOT,ZOO,VIR} [options]",
    description=EpacConfig.SATIVA_INFO % "SATIVA",
    epilog="Example: sativa.py -s example/test.phy -t example/test.tax -x BAC",
    formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument("-s", dest="align_fname",
            help="""Reference alignment file (PHYLIP or FASTA). Sequences must be aligned, 
            their IDs must correspond to those in taxonomy file.""")
    parser.add_argument("-t", dest="taxonomy_fname",
            help="""Reference taxonomy file.""")
    parser.add_argument("-x", dest="taxcode_name", choices=["bac", "bot", "zoo", "vir"], type = str.lower,
            help="""Taxonomic code: BAC(teriological), BOT(anical), ZOO(logical), VIR(ological)""")
    parser.add_argument("-n", dest="output_name", default=None,
            help="""Job name, will be used as a prefix for output file names (default: taxonomy file name without extension)""")
    parser.add_argument("-o", dest="output_dir", default=".",
            help="""Output directory (default: current).""")
    parser.add_argument("-T", dest="num_threads", type=int, default=multiprocessing.cpu_count(),
            help="""Specify the number of CPUs (default: %d)""" % multiprocessing.cpu_count())
    parser.add_argument("-N", dest="rep_num", type=int, default=1, 
            help="""Number of RAxML tree searches (with distinct random seeds) to resolve multifurcation. Default: 1""")
    parser.add_argument("-v", dest="verbose", action="store_true",
            help="""Print additional info messages to the console.""")
    parser.add_argument("-R", dest="restart", action="store_true",
            help="""Resume execution after a premature termination (e.g., due to expired job time limit).
Run name of the previous (terminated) job must be specified via -n option.""")
    parser.add_argument("-c", dest="config_fname", default=None,
            help="Config file name.")
    parser.add_argument("-r", dest="ref_fname",
            help="""Specify the reference alignment and taxonomy in refjson format.""")
    parser.add_argument("-j", dest="jplace_fname", default=None,
            help="""Do not call RAxML EPA, use existing .jplace file as input instead. 
            This could be also a directory with *.jplace files.""")
    parser.add_argument("-p", dest="rand_seed", type=int, default=12345,
            help="""Random seed to be used with RAxML. Default: 12345""")
    parser.add_argument("-C", dest="conf_cutoff", type=float, default=0.,
            help="""Confidence cut-off between 0 and 1. Default: 0\n""")
    parser.add_argument("-P", dest="brlen_pv", type=float, default=0.,
            help="""P-value for branch length Erlang test. Default: 0=off\n""")
    parser.add_argument("-l", dest="min_lhw", type=float, default=0.,
            help="""A value between 0 and 1, the minimal sum of likelihood weight of
                    an assignment to a specific rank. This value represents a confidence 
                    measure of the assignment, assignments below this value will be discarded. 
                    Default: 0 to output all possbile assignments.""")
    parser.add_argument("-m", dest="mfresolv_method", choices=["thorough", "fast", "ultrafast"],
            default="thorough", help="""Method of multifurcation resolution: 
            thorough    use stardard constrainted RAxML tree search (default)
            fast        use RF distance as search convergence criterion (RAxML -D option)
            ultrafast   optimize model+branch lengths only (RAxML -f e option)""")
    parser.add_argument("-debug", dest="debug", action="store_true",
            help="""Debug mode, intermediate files will not be cleaned up.""")
    parser.add_argument("-ranktest", dest="ranktest", action="store_true",
            help="""Test for misplaced higher ranks.""")
    parser.add_argument("-tmpdir", dest="temp_dir", default=None,
            help="""Directory for temporary files.""")

    args = parser.parse_args()
    if len(sys.argv) == 1: 
        parser.print_help()
        sys.exit()
    check_args(args, parser)
    return args
Ejemplo n.º 5
0
def parse_args():
    parser = ArgumentParser(
        usage=
        "%(prog)s -s ALIGNMENT -t TAXONOMY -x {BAC,BOT,ZOO,VIR} [options]",
        description=EpacConfig.SATIVA_INFO % "SATIVA",
        epilog=
        "Example: sativa.py -s example/test.phy -t example/test.tax -x BAC",
        formatter_class=RawDescriptionHelpFormatter)
    parser.add_argument(
        "-s",
        dest="align_fname",
        help=
        """Reference alignment file (PHYLIP or FASTA). Sequences must be aligned, 
            their IDs must correspond to those in taxonomy file.""")
    parser.add_argument("-t",
                        dest="taxonomy_fname",
                        help="""Reference taxonomy file.""")
    parser.add_argument(
        "-x",
        dest="taxcode_name",
        choices=["bac", "bot", "zoo", "vir"],
        type=str.lower,
        help=
        """Taxonomic code: BAC(teriological), BOT(anical), ZOO(logical), VIR(ological)"""
    )
    parser.add_argument(
        "-n",
        dest="output_name",
        default=None,
        help=
        """Job name, will be used as a prefix for output file names (default: taxonomy file name without extension)"""
    )
    parser.add_argument("-o",
                        dest="output_dir",
                        default=".",
                        help="""Output directory (default: current).""")
    parser.add_argument("-T",
                        dest="num_threads",
                        type=int,
                        default=multiprocessing.cpu_count(),
                        help="""Specify the number of CPUs (default: %d)""" %
                        multiprocessing.cpu_count())
    parser.add_argument(
        "-N",
        dest="rep_num",
        type=int,
        default=1,
        help=
        """Number of RAxML tree searches (with distinct random seeds) to resolve multifurcation. Default: 1"""
    )
    parser.add_argument(
        "-v",
        dest="verbose",
        action="store_true",
        help="""Print additional info messages to the console.""")
    parser.add_argument(
        "-R",
        dest="restart",
        action="store_true",
        help=
        """Resume execution after a premature termination (e.g., due to expired job time limit).
Run name of the previous (terminated) job must be specified via -n option.""")
    parser.add_argument("-c",
                        dest="config_fname",
                        default=None,
                        help="Config file name.")
    parser.add_argument(
        "-r",
        dest="ref_fname",
        help=
        """Specify the reference alignment and taxonomy in refjson format.""")
    parser.add_argument(
        "-j",
        dest="jplace_fname",
        default=None,
        help=
        """Do not call RAxML to perform EPA leave-one-out test, use existing .jplace file as input instead. 
            This could be also a directory with *.jplace files.""")
    parser.add_argument(
        "-J",
        dest="final_jplace_fname",
        default=None,
        help=
        """Do not call RAxML to perform final EPA classification, use existing .jplace file as input instead. 
            This could be also a directory with *.jplace files.""")
    parser.add_argument(
        "-p",
        dest="rand_seed",
        type=int,
        default=12345,
        help="""Random seed to be used with RAxML. Default: 12345""")
    parser.add_argument(
        "-C",
        dest="conf_cutoff",
        type=float,
        default=0.,
        help="""Confidence cut-off between 0 and 1. Default: 0\n""")
    parser.add_argument(
        "-P",
        dest="brlen_pv",
        type=float,
        default=0.,
        help="""P-value for branch length Erlang test. Default: 0=off\n""")
    parser.add_argument(
        "-l",
        dest="min_lhw",
        type=float,
        default=0.,
        help="""A value between 0 and 1, the minimal sum of likelihood weight of
                    an assignment to a specific rank. This value represents a confidence 
                    measure of the assignment, assignments below this value will be discarded. 
                    Default: 0 to output all possbile assignments.""")
    parser.add_argument("-m",
                        dest="mfresolv_method",
                        choices=["thorough", "fast", "ultrafast"],
                        default="thorough",
                        help="""Method of multifurcation resolution: 
            thorough    use stardard constrainted RAxML tree search (default)
            fast        use RF distance as search convergence criterion (RAxML -D option)
            ultrafast   optimize model+branch lengths only (RAxML -f e option)"""
                        )
    parser.add_argument(
        "-S",
        dest="save_memory",
        action="store_true",
        help=
        """Enable RAxML memory saving (useful for large and gappy alignments)."""
    )
    parser.add_argument(
        "-Y",
        dest="synonym_fname",
        default=None,
        help=
        """File listing synonymous rank names, which will be considered equivalent.
            Please enter one name per line; separate groups with an empty line."""
    )
    parser.add_argument(
        "-debug",
        dest="debug",
        action="store_true",
        help="""Debug mode, intermediate files will not be cleaned up.""")
    parser.add_argument("-ranktest",
                        dest="ranktest",
                        action="store_true",
                        help="""Test for misplaced higher ranks.""")
    parser.add_argument("-tmpdir",
                        dest="temp_dir",
                        default=None,
                        help="""Directory for temporary files.""")

    args = parser.parse_args()
    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit()
    check_args(args, parser)
    return args