Esempio n. 1
0
File: config.py Progetto: uym2/sepp
def _init_parser():
    global _parser
    _parser = ArgumentParser(
        description=("This script runs the SEPP algorithm on an input "
                     "tree, alignment, fragment file, and RAxML info file."),
        conflict_handler='resolve')

    _parser.add_argument("-v",
                         "--version",
                         action='version',
                         version="%(prog)s " + version)

    decompGroup = _parser.add_argument_group(
        "Decomposition Options".upper(), ' '.join([
            "These options determine the alignment decomposition size and",
            "taxon insertion size.  If None is given, then the default",
            "is to align/place at 10% of total taxa.  "
            "The alignment decomosition size must be",
            "less than the taxon insertion size."
        ]))
    _parser.groups = dict()
    _parser.groups['decompGroup'] = decompGroup

    decompGroup.add_argument(
        "-A",
        "--alignmentSize",
        type=int,
        dest="alignment_size",
        metavar="N",
        default=None,
        help=("max alignment subset size of N "
              "[default: 10%% of the total number of taxa or the placement"
              " subset size if given]"))
    decompGroup.add_argument(
        "-P",
        "--placementSize",
        type=int,
        dest="placement_size",
        metavar="N",
        default=None,
        help=("max placement subset size of N "
              "[default: 10%% of the total number of taxa or the alignment "
              "length (whichever bigger)]"))
    decompGroup.add_argument(
        "-F",
        "--fragmentChunkSize",
        type=int,
        dest="max_chunk_size",
        metavar="N",
        default=20000,
        help=("maximum fragment chunk size of N. Helps controlling memory.  "
              "[default: 20000]"))
    decompGroup.add_argument(
        "-D",
        "--distance",
        type=float,
        dest="distance",
        metavar="DISTANCE",
        default=1,
        help=("minimum p-distance before stopping the decomposition"
              "[default: 1]"))
    # uym2 added #
    decompGroup.add_argument(
        "-M",
        "--diameter",
        type=float,
        dest="maxDiam",
        metavar="DIAMETER",
        default=None,
        help=("maximum tree diameter before stopping the decomposition"
              "[default: None]"))

    decompGroup.add_argument(
        "-S",
        "--decomp_strategy",
        type=valid_decomp_strategy,
        dest="decomp_strategy",
        metavar="DECOMP",
        default="normal",
        # default = "midpoint",
        help="decomposition strategy "
        "[default: using tree branch length]")
    # "[default: only include smallest subsets]")

    outputGroup = _parser.add_argument_group("Output Options".upper(),
                                             "These options control output.")
    _parser.groups['outputGroup'] = outputGroup

    outputGroup.add_argument(
        "-p",
        "--tempdir",
        dest="tempdir",
        metavar="DIR",
        type=valid_dir_path,
        default=get_default_temp_dir(),
        help=("Tempfile files will be written to DIR. Full-path required. "
              "[default: %(default)s]"))
    outputGroup.add_argument("-rt",
                             "--remtemp",
                             dest="remtemp",
                             action="store_true",
                             help=("Remove tempfile directory.  "
                                   "[default: disabled]"))
    outputGroup.set_defaults(remtemp=False)
    outputGroup.add_argument(
        "-o",
        "--output",
        dest="output",
        metavar="OUTPUT",
        default="output",
        type=valid_file_prefix,
        help="output files with prefix OUTPUT. [default: %(default)s]")
    outputGroup.add_argument(
        "-d",
        "--outdir",
        dest="outdir",
        metavar="OUTPUT_DIR",
        default=os.path.curdir,
        type=valid_dir_path,
        help=("output to OUTPUT_DIR directory. full-path required. "
              "[default: %(default)s]"))

    inputGroup = _parser.add_argument_group(
        "Input Options".upper(), ' '.join([
            "These options control input. To run SEPP the following is "
            "required. A backbone tree (in newick format), a RAxML_info file "
            "(this is the file generated by RAxML during estimation of the "
            "backbone tree. Pplacer uses this info file to set model "
            "parameters), a backbone alignment file (in fasta format), and a"
            " fasta file including fragments.  The input sequences are assumed"
            " to be DNA unless specified otherwise."
        ]))
    _parser.groups['inputGroup'] = inputGroup

    inputGroup.add_argument(
        "-c",
        "--config",
        dest="config_file",
        metavar="CONFIG",
        type=argparse.FileType('r'),
        help=("A config file, including options used to run SEPP. Options"
              " provided as command line arguments overwrite config file "
              "values for those options. "
              "[default: %(default)s]"))
    inputGroup.add_argument(
        "-t",
        "--tree",
        dest="tree_file",
        metavar="TREE",
        type=argparse.FileType('r'),
        help="Input tree file (newick format) [default: %(default)s]")
    inputGroup.add_argument(
        "-r",
        "--raxml",
        dest="info_file",
        metavar="RAXML",
        type=argparse.FileType('r'),
        help=("RAxML_info file including model parameters, generated by RAxML."
              "[default: %(default)s]"))
    inputGroup.add_argument("-a",
                            "--alignment",
                            dest="alignment_file",
                            metavar="ALIGN",
                            type=argparse.FileType('r'),
                            help="Aligned fasta file [default: %(default)s]")
    inputGroup.add_argument("-f",
                            "--fragment",
                            dest="fragment_file",
                            metavar="FRAG",
                            type=argparse.FileType('r'),
                            help="fragment file [default: %(default)s]")
    inputGroup.add_argument(
        "-m",
        "--molecule",
        dest="molecule",
        metavar="MOLECULE",
        type=valid_molecule,
        default="dna",
        help=("Molecule type of sequences. Can be amino, dna, or rna "
              "[default: %(default)s]"))

    otherGroup = _parser.add_argument_group(
        "Other options".upper(), "These options control how SEPP is run")
    _parser.groups['otherGroup'] = otherGroup
    otherGroup.add_argument(
        "-x",
        "--cpu",
        type=set_cpu,
        dest="cpu",
        metavar="N",
        default=set_cpu(cpu_count()),
        help=("Use N cpus "
              "[default: number of cpus available on the machine]"))
    otherGroup.add_argument("-cp",
                            "--checkpoint",
                            type=set_checkpoint,
                            dest="checkpoint",
                            metavar="CHCK_FILE",
                            default=set_checkpoint(None),
                            help="checkpoint file [default: no checkpointing]")
    otherGroup.add_argument(
        "-cpi",
        "--interval",
        type=int,
        dest="checkpoint_interval",
        metavar="N",
        default=3600,
        help=("Interval (in seconds) between checkpoint writes. Has effect "
              "only with -cp provided. [default: 3600]"))
    otherGroup.add_argument("-seed",
                            "--randomseed",
                            type=int,
                            dest="seed",
                            metavar="N",
                            default=297834,
                            help="random seed number. [default: 297834]")
    # inputGroup.add_argument("-p", "--package",
    #                  dest="package", metavar="PKG",
    #                  help="package directory"
    #                         "[default: %(default)s]")
    #

    return _parser
Esempio n. 2
0
File: config.py Progetto: kgori/sepp
def _init_parser():
    global _parser
    _parser = ArgumentParser(description= 
                            "This script runs the SEPP algorithm on an input "
                            "tree, alignment, fragment file, and RAxML info file.", conflict_handler='resolve')    
    
    _parser.add_argument("-v", "--version", action='version', version= "%(prog)s " + version)

    decompGroup = _parser.add_argument_group("Decomposition Options".upper(), 
                         ' '.join(["These options determine the alignment decomposition size and", 
                                 "taxon insertion size.  If None is given, then the default",
                                 "is to align/place at 10% of total taxa.  The alignment decomosition size must be",
                                 "less than the taxon insertion size."]))
    _parser.groups = dict()
    _parser.groups['decompGroup'] = decompGroup                             
                                 
    decompGroup.add_argument("-A", "--alignmentSize", type = int, 
                      dest = "alignment_size", metavar = "N", 
                      default = None,
                      help = "max alignment subset size of N "
                             "[default: 10%% of the total number of taxa]")    
    decompGroup.add_argument("-P", "--placementSize", type = int, 
                      dest = "placement_size", metavar = "N",
                      default = None, 
                      help = "max placement subset size of N "
                             "[default: 10%% of the total number of taxa]")
                             
    decompGroup.add_argument("-S", "--decomp_strategy", type = valid_decomp_strategy, 
                      dest = "decomp_strategy", metavar = "DECOMP",
                      default = "normal", 
                      help = "decomposition strategy "
                             "[default: only include smallest subsets]")                             
        
    outputGroup = _parser.add_argument_group( "Output Options".upper(), 
                         "These options control output.") 
    _parser.groups['outputGroup'] = outputGroup
    
    outputGroup.add_argument("-p", "--tempdir", 
                      dest = "tempdir", metavar = "DIR",
                      type=valid_dir_path,
                      default = get_default_temp_dir(),                       
                      help = "Tempfile files will be written to DIR. Full-path required. "
                             "[default: %(default)s]")    
    outputGroup.add_argument("-o", "--output", 
                      dest = "output", metavar = "OUTPUT",
                      default = "output", 
                      type= valid_file_prefix,
                      help = "output files with prefix OUTPUT. "
                             "[default: %(default)s]")
    outputGroup.add_argument("-d", "--outdir", 
                      dest = "outdir", metavar = "OUTPUT_DIR", 
                      default = os.path.curdir, 
                      type = valid_dir_path,
                      help = "output to OUTPUT_DIR directory. full-path required. "
                             "[default: %(default)s]")                       
                             
    inputGroup = _parser.add_argument_group ("Input Options".upper(), 
                         ' '.join(["These options control input. To run SEPP the following is required." 
                                 "A backbone tree (in newick format), a RAxML_info file (this is the file generated by RAxML during estimation of the backbone tree. " 
                                 "Pplacer uses this info file to set model parameters),"
                                 "a backbone alignment file (in fasta format), and a fasta file including fragments.  The input sequences are assumed to be DNA unless specified otherwise."])) 
    _parser.groups['inputGroup'] = inputGroup
    
    inputGroup.add_argument("-c", "--config", 
                      dest = "config_file", metavar = "CONFIG",
                      type = argparse.FileType('r'), 
                      help = "A config file, including options used to run SEPP. Options provided as command line arguments overwrite config file values for those options. "
                             "[default: %(default)s]")    
    inputGroup.add_argument("-t", "--tree", 
                      dest = "tree_file", metavar = "TREE",
                      type = argparse.FileType('r'), 
                      help = "Input tree file (newick format) "
                             "[default: %(default)s]")    
    inputGroup.add_argument("-r", "--raxml", 
                      dest = "info_file", metavar = "RAXML",
                      type = argparse.FileType('r'), 
                      help = "RAxML_info file including model parameters, generated by RAxML."
                             "[default: %(default)s]")    
    inputGroup.add_argument("-a", "--alignment", 
                      dest = "alignment_file", metavar = "ALIGN",
                      type = argparse.FileType('r'), 
                      help = "Aligned fasta file "
                             "[default: %(default)s]")    
    inputGroup.add_argument("-f", "--fragment",
                      dest = "fragment_file", metavar = "FRAG",
                      type = argparse.FileType('r'), 
                      help = "fragment file "
                             "[default: %(default)s]")          
    inputGroup.add_argument("-m", "--molecule",
                      dest = "molecule", metavar = "MOLECULE",
                      type = valid_molecule,
                      default = "dna", 
                      help = "Molecule type of sequences. Can be amino, dna, or rna "
                             "[default: %(default)s]")          
                             
    otherGroup = _parser.add_argument_group( "Other options".upper(), 
                         "These options control how SEPP is run")
    _parser.groups['otherGroup'] = otherGroup                          
    otherGroup.add_argument("-x", "--cpu", type = set_cpu, 
                      dest = "cpu", metavar = "N", 
                      default = set_cpu(cpu_count()),
                      help = "Use N cpus "
                             "[default: number of cpus available on the machine]")
    otherGroup.add_argument("-cp", "--checkpoint", type = set_checkpoint, 
                      dest = "checkpoint", metavar = "CHCK_FILE", 
                      default = set_checkpoint(None),
                      help = "checkpoint file "
                             "[default: no checkpointing]")                                                          
    otherGroup.add_argument("-cpi", "--interval", type = int, 
                      dest = "checkpoint_interval", metavar = "N", 
                      default = 3600,
                      help = "Interval (in seconds) between checkpoint writes. Has effect only with -cp provided."
                             "[default: 3600]")  
    #inputGroup.add_argument("-p", "--package", 
    #                  dest = "package", metavar = "PKG", 
    #                  help = "package directory"
    #                         "[default: %(default)s]")                                                          
    #                         

    
    return _parser