Example #1
0
def _main():
    parser = ArgumentParser()
    parser.add_argument("-v", dest="verbose", action="store_true")
    parser.add_argument("--debug", dest="debug", action="store_true")
    parser.add_argument("--dir", dest='directory', type=str, default='')
    parser.add_argument("--cpu", dest='cores', type=int, default=1)
    parser.add_argument(dest="targets", nargs="*")
    args = parser.parse_args()
    APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/")
    ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/")

    if pexist(pjoin('/etc/etetoolkit/', 'ext_apps-latest')):
        # if a copy of apps is part of the ete distro, use if by default
        APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest')
        ETEHOMEDIR = '/etc/etetoolkit/'
    else:
        # if not, try a user local copy
        APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest')

    TARGET_DIR = args.directory

    while not pexist(TARGET_DIR):
        TARGET_DIR = input('target directory? [%s]:' % ETEHOMEDIR).strip()
        if TARGET_DIR == '':
            TARGET_DIR = ETEHOMEDIR
            break

    if TARGET_DIR == ETEHOMEDIR:
        try:
            os.mkdir(ETEHOMEDIR)
        except OSError:
            pass

    version_file = "latest.tar.gz"
    print(colorify('Downloading latest version of tools...', "green"),
          file=sys.stderr)
    sys.stderr.flush()

    urlretrieve(
        "https://github.com/jhcepas/ext_apps/archive/%s" % version_file,
        pjoin(TARGET_DIR, version_file))
    print(colorify('Decompressing...', "green"), file=sys.stderr)
    tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz')
    tfile.extractall(TARGET_DIR)
    print(colorify('Compiling tools...', "green"), file=sys.stderr)
    sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest'))
    import compile_all
    s = compile_all.compile_all(targets=args.targets,
                                verbose=args.verbose,
                                cores=args.cores)
    return s
Example #2
0
def _main():
    parser = ArgumentParser()
    parser.add_argument("-v", dest="verbose", action="store_true")
    parser.add_argument("--debug", dest="debug", action="store_true")
    parser.add_argument("--dir", dest='directory', type=str, default='')
    parser.add_argument("--cpu", dest='cores', type=int, default=1)
    parser.add_argument(dest="targets", nargs="*")    
    args = parser.parse_args()
    APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/")
    ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/")

    if pexist(pjoin('/etc/etetoolkit/', 'ext_apps-latest')):
        # if a copy of apps is part of the ete distro, use if by default
        APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest')
        ETEHOMEDIR = '/etc/etetoolkit/'
    else:
        # if not, try a user local copy
        APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest')

    
    TARGET_DIR = args.directory
    
    while not pexist(TARGET_DIR):
        TARGET_DIR = input('target directory? [%s]:' %ETEHOMEDIR).strip()
        if TARGET_DIR == '':
            TARGET_DIR = ETEHOMEDIR
            break

    if TARGET_DIR == ETEHOMEDIR:
        try:
            os.mkdir(ETEHOMEDIR)
        except OSError:
            pass

    version_file = "latest.tar.gz"
    print (colorify('Downloading latest version of tools...', "green"), file=sys.stderr)
    sys.stderr.flush()

    urlretrieve("https://github.com/jhcepas/ext_apps/archive/%s" %version_file, pjoin(TARGET_DIR, version_file))
    print(colorify('Decompressing...', "green"), file=sys.stderr)
    tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz')
    tfile.extractall(TARGET_DIR)
    print(colorify('Compiling tools...', "green"), file=sys.stderr)
    sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest'))
    import compile_all
    s = compile_all.compile_all(targets=args.targets, verbose=args.verbose, cores=args.cores)
    return s
Example #3
0
def _main():
    global BASEPATH, APPSPATH, args
    APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/")
    ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/")

    if os.path.exists(pjoin('/etc/etetoolkit/', 'ext_apps-latest')):
        # if a copy of apps is part of the ete distro, use if by default
        APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest')
        ETEHOMEDIR = '/etc/etetoolkit/'
    else:
        # if not, try a user local copy
        APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest')

    if len(sys.argv) == 1:
        if not pexist(APPSPATH):
            print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr)
            print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr)

    elif len(sys.argv) > 1:
        _config_path = pjoin(BASEPATH, 'phylobuild.cfg')

        if sys.argv[1] == "install_tools":
            import urllib
            import tarfile
            print (colorify('Downloading latest version of tools...', "green"), file=sys.stderr)
            if len(sys.argv) > 2:
                TARGET_DIR = sys.argv[2]
            else:
                TARGET_DIR = ''
            while not pexist(TARGET_DIR):
                TARGET_DIR = input('target directory? [%s]:' %ETEHOMEDIR).strip()
                if TARGET_DIR == '':
                    TARGET_DIR = ETEHOMEDIR
                    break
            if TARGET_DIR == ETEHOMEDIR:
                try:
                    os.mkdir(ETEHOMEDIR)
                except OSError:
                    pass

            version_file = "latest.tar.gz"
            urllib.urlretrieve("https://github.com/jhcepas/ext_apps/archive/%s" %version_file, pjoin(TARGET_DIR, version_file))
            print(colorify('Decompressing...', "green"), file=sys.stderr)
            tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz')
            tfile.extractall(TARGET_DIR)
            print(colorify('Compiling tools...', "green"), file=sys.stderr)
            sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest'))
            import compile_all
            s = compile_all.compile_all()
            sys.exit(s)

        elif sys.argv[1] == "check":
            if not pexist(APPSPATH):
                print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr)
                print(colorify('Use "ete build install_tools" to install or upgrade', "orange"), file=sys.stderr)
            # setup portable apps
            config = {}
            for k in apps.builtin_apps:
                cmd = apps.get_call(k, APPSPATH, "/tmp", "1")
                config[k] = cmd
            apps.test_apps(config)
            sys.exit(0)

        elif sys.argv[1] in ("workflows", "wl"):
            if sys.argv[1] == "wl":
                print(colorify("WARNING: 'wl' is obsolete and will be removed in the future, use 'workflows' instead", "orange"), file=sys.stderr)

            base_config = check_config(_config_path)
            list_workflows(base_config)
            sys.exit(0)

        elif sys.argv[1] == "apps":
            base_config = check_config(_config_path)
            list_apps(base_config, set(sys.argv[2:]))
            sys.exit(0)
            
        elif sys.argv[1] == "show":
            base_config = check_config(_config_path)
            try:
                block = sys.argv[2]
            except IndexError:
                print("Expected a block name, found none")
                sys.exit(1)

            block_detail(block, base_config)
            sys.exit(0)

        elif sys.argv[1] == "dump":
            if len(sys.argv) > 2:
                base_config = check_config(_config_path)
                block_detail(sys.argv[2], base_config, color=False)
            else:
                print(open(_config_path).read())
            sys.exit(0)

        elif sys.argv[1] == "validate":
            print('Validating configuration file ', sys.argv[2])
            if pexist(sys.argv[2]):
                base_config = check_config(sys.argv[2])
                print('Everything ok')
            else:
                print('File does not exist')
                sys.exit(-1)
            sys.exit(0)

        elif sys.argv[1] == "version":
            print(__VERSION__, '(%s)' %__DATE__)
            sys.exit(0)

    parser = argparse.ArgumentParser(description=__DESCRIPTION__ + __EXAMPLES__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    # Input data related flags
    input_group = parser.add_argument_group('==== Input Options ====')

    input_group.add_argument('[check | workflows | apps | show | dump | validate | version | install_tools]',
                             nargs='?',
                             help=("Utility commands:\n"
                                   "check: check that external applications are executable.\n"
                                   "wl: show a list of available workflows.\n"
                                   "show [name]: show the configuration parameters of a given workflow or application config block.\n"
                                   "dump [name]: dump the configuration parameters of the specified block (allows to modify predefined config).\n"
                                   "validate [configfile]: Validate a custom configuration file.\n"
                                   "version: Show current version.\n"
                                   ))

    input_group.add_argument("-c", "--config", dest="configfile",
                             type=is_file, default=BASEPATH+'/phylobuild.cfg',
                             help="Custom configuration file.")

    input_group.add_argument("--tools-dir", dest="tools_dir",
                             type=str,
                             help="Custom path where external software is avaiable.")

    input_group.add_argument("-w", dest="workflow",
                             required=True,
                             nargs='+',
                             help="One or more gene-tree workflow names. All the specified workflows will be executed using the same input data.")

    input_group.add_argument("-m", dest="supermatrix_workflow",
                             required=False,
                             nargs='+',
                             help="One or more super-matrix workflow names. All the specified workflows will be executed using the same input data.")

    input_group.add_argument("-a", dest="aa_seed_file",
                             type=is_file,
                             help="Initial multi sequence file with"
                             " protein sequences.")


    input_group.add_argument("-n", dest="nt_seed_file",
                             type=is_file,
                             help="Initial multi sequence file with"
                             " nucleotide sequences")

    # input_group.add_argument("--seqformat", dest="seqformat",
    #                          choices=["fasta", "phylip", "iphylip", "phylip_relaxed", "iphylip_relaxed"],
    #                          default="fasta",
    #                          help="")

    input_group.add_argument("--dealign", dest="dealign",
                             action="store_true",
                             help="when used, gaps in the orginal fasta file will"
                             " be removed, thus allowing to use alignment files as input.")

    input_group.add_argument("--seq-name-parser", dest="seq_name_parser",
                             type=str, 
                             help=("A Perl regular expression containing a matching group, which is"
                                   " used to parse sequence names from the input files. Use this option to"
                                   " customize the names that should be shown in the output files."
                                   " The matching group (the two parentheses) in the provided regular"
                                   " expression will be assumed as sequence name. By default, all "
                                   " characthers until the first blank space or tab delimiter are "
                                   " used as the sequence names."),
                             default='^([^\s]+)')
                                 
    input_group.add_argument("--no-seq-rename", dest="seq_rename",
                             action="store_false",
                             help="If used, sequence names will NOT be"
                             " internally translated to 10-character-"
                             "identifiers.")

    input_group.add_argument("--no-seq-checks", dest="no_seq_checks",
                            action="store_true",
                            help="Skip consistency sequence checks for not allowed symbols, etc.")
    input_group.add_argument("--no-seq-correct", dest="no_seq_correct",
                            action="store_true",
                            help="Skip sequence compatibility changes: i.e. U, J and O symbols are converted into X by default.")

    dup_names_group = input_group.add_mutually_exclusive_group()

    dup_names_group.add_argument("--ignore-dup-seqnames", dest="ignore_dup_seqnames",
                                 action = "store_true",
                                 help=("If duplicated sequence names exist in the input"
                                       " fasta file, a single random instance will be used."))

    dup_names_group.add_argument("--rename-dup-seqnames", dest="rename_dup_seqnames",
                                 action = "store_true",
                                 help=("If duplicated sequence names exist in the input"
                                       " fasta file, duplicates will be renamed."))



    input_group.add_argument("--seqdb", dest="seqdb",
                             type=str,
                             help="Uses a custom sequence database file")


    # supermatrix workflow

    input_group.add_argument("--cogs", dest="cogs_file",
                             type=is_file,
                             help="A file defining clusters of orthologous groups."
                             " One per line. Tab delimited sequence ids. ")

    input_group.add_argument("--lineages", dest="lineages_file",
                             type=is_file,
                             help="A file containing the (sorted) lineage "
                                  "track of each species. It enables "
                                  "NPR algorithm to fix what taxonomic "
                                  "levels should be optimized."
                                  "Note that linage tracks must consist in "
                                  "a comma separated list of taxonomic levels "
                                  "sorted from deeper to swallower clades "
                                  "(i.e. 9606 [TAB] Eukaryotes,Mammals,Primates)"
                             )

    input_group.add_argument("--spname-delimiter", dest="spname_delimiter",
                             type=str, default="_",
                             help="spname_delimiter is used to split"
                             " the name of sequences into species code and"
                             " sequence identifier (i.e. HUMAN_p53 = HUMAN, p53)."
                             " Note that species name must always precede seq.identifier.")

    input_group.add_argument("--spfile", dest="spfile",
                             type=is_file,
                             help="If specified, only the sequences and ortholog"
                             " pairs matching the group of species in this file"
                             " (one species code per line) will be used. ")

    npr_group = parser.add_argument_group('==== NPR options ====')
    npr_group.add_argument("-r", "--recursive", dest="npr_workflows",
                           required=False,
                           nargs="*",
                           help="Enables recursive NPR capabilities (Nested Phylogenetic Reconstruction)"
                           " and specifies custom workflows and filters for each NPR iteration.")
    npr_group.add_argument("--nt_switch_thr", dest="nt_switch_thr",
                           required=False,
                           type=float,
                           default = 0.95,
                           help="Sequence similarity at which nucleotide based alignments should be used"
                           " instead of amino-acids. ")
    npr_group.add_argument("--max-iters", dest="max_iters",
                           required=False,
                           type=int,
                           default=99999999,
                           help="Set a maximum number of NPR iterations allowed.")
    npr_group.add_argument("--first-split-outgroup", dest="first_split",
                           type=str,
                           default='midpoint',
                           help=("When used, it overrides first_split option"
                                 " in any tree merger config block in the"
                                 " config file. Default: 'midpoint' "))


    # Output data related flags
    output_group = parser.add_argument_group('==== Output Options ====')
    output_group.add_argument("-o", "--outdir", dest="outdir",
                              type=str, required=True,
                              help="""Output directory for results.""")

    output_group.add_argument("--scratch-dir", dest="scratch_dir",
                              type=is_dir,
                              help="""If provided, ete-build will run on the scratch folder and all files will be transferred to the output dir when finished. """)

    output_group.add_argument("--db-dir", dest="db_dir",
                              type=is_dir,
                              help="""Alternative location of the database directory""")

    output_group.add_argument("--tasks-dir", dest="tasks_dir",
                              type=is_dir,
                              help="""Output directory for the executed processes (intermediate files).""")

    output_group.add_argument("--compress", action="store_true",
                              help="Compress all intermediate files when"
                              " a workflow is finished.")

    output_group.add_argument("--logfile", action="store_true",
                              help="Log messages will be saved into a file named npr.log within the output directory.")

    output_group.add_argument("--noimg", action="store_true",
                              help="Tree images will not be generated when a workflow is finished.")

    output_group.add_argument("--email", dest="email",
                              type=str,
                              help="Send an email when errors occur or a workflow is done.")

    output_group.add_argument("--email_report_time", dest="email_report_time",
                              type=int, default = 0,
                              help="How often (in minutes) an email reporting the status of the execution should be sent. 0=No reports")


    # Task execution related flags
    exec_group = parser.add_argument_group('==== Execution Mode Options ====')

    exec_group.add_argument("-C", "--cpu", dest="maxcores", type=int,
                            default=1, help="Maximum number of CPU cores"
                            " available in the execution host. If higher"
                            " than 1, tasks with multi-threading"
                            " capabilities will enabled. Note that this"
                            " number will work as a hard limit for all applications,"
                            "regardless of their specific configuration.")

    exec_group.add_argument("-t", "--schedule-time", dest="schedule_time",
                            type=float, default=2,
                            help="""How often (in secs) tasks should be checked for available results.""")

    exec_group.add_argument("--launch-time", dest="launch_time",
                            type=float, default=3,
                            help="""How often (in secs) queued jobs should be checked for launching""")

    exec_type_group = exec_group.add_mutually_exclusive_group()

    exec_type_group.add_argument("--noexec", dest="no_execute",
                                 action="store_true",
                                 help=("Prevents launching any external application."
                                       " Tasks will be processed and intermediate steps will"
                                       " run, but no real computation will be performed."))

    exec_type_group.add_argument("--sge", dest="sge_execute",
                                 action="store_true", help="EXPERIMENTAL!: Jobs will be"
                                 " launched using the Sun Grid Engine"
                                 " queue system.")

    exec_group.add_argument("--monitor", dest="monitor",
                            action="store_true",
                            help="Monitor mode: pipeline jobs will be"
                            " detached from the main process. This means that"
                            " when npr execution is interrupted, all currently"
                            " running jobs will keep running. Use this option if you"
                            " want to stop and recover an execution thread or"
                            " if jobs are expected to be executed remotely."
                            )

    exec_group.add_argument("--override", dest="override",
                            action="store_true",
                            help="Override workflow configuration file if a previous version exists." )

    exec_group.add_argument("--clearall", dest="clearall",
                            action="store_true",
                            help="Erase all previous data in the output directory and start a clean execution.")

    exec_group.add_argument("--softclear", dest="softclear",
                            action="store_true",
                            help="Clear all precomputed data (data.db), but keeps task raw data in the directory, so they can be re-processed.")


    exec_group.add_argument("--clear-seqdb", dest="clearseqs",
                            action="store_true",
                            help="Reload sequences deleting previous database if necessary.")

    # exec_group.add_argument("--arch", dest="arch",
    #                         choices=["auto", "32", "64"],
    #                         default="auto", help="Set the architecture of"
    #                         " execution hosts (needed only when using"
    #                         " built-in applications.)")

    exec_group.add_argument("--nochecks", dest="nochecks",
                            action="store_true",
                            help="Skip basic checks (i.e. tools available) everytime the application starts.")

    # Interface related flags
    ui_group = parser.add_argument_group("==== Program Interface Options ====")
    # ui_group.add_argument("-u", dest="enable_ui",
    #                     action="store_true", help="When used, a color"
    #                     " based interface is launched to monitor NPR"
    #                     " processes. This feature is EXPERIMENTAL and"
    #                     " requires NCURSES libraries installed in your"
    #                     " system.")

    ui_group.add_argument("-v", dest="verbosity",
                          default=0,
                          type=int, choices=[0,1,2,3,4],
                          help="Verbosity level: 0=very quiet, 4=very "
                          " verbose.")

    ui_group.add_argument("--debug", nargs="?",
                          const="all",
                          help="Start debugging"
                          " A taskid can be provided, so"
                          " debugging will start from such task on.")

    args = parser.parse_args()
    if args.tools_dir:
        APPSPATH = args.tools_dir

    if not pexist(APPSPATH):
        print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr)
        print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr)

    args.enable_ui = False
    if not args.noimg:
        print('Testing ETE-build graphics support...')
        print('X11 DISPLAY = %s' %colorify(os.environ.get('DISPLAY', 'not detected!'), 'yellow'))
        print('(You can use --noimg to disable graphical capabilities)')
        try:
            from .. import Tree
            Tree().render('/tmp/etenpr_img_test.png')
        except:
            raise ConfigError('img generation not supported')

    if not args.aa_seed_file and not args.nt_seed_file:
        parser.error('At least one input file argument (-a, -n) is required')

    outdir = os.path.abspath(args.outdir)
    final_dir, runpath = os.path.split(outdir)
    if not runpath:
        raise ValueError("Invalid outdir")

    GLOBALS["output_dir"] = os.path.abspath(args.outdir)

    if args.scratch_dir:
        # set paths for scratch folder for sqlite files
        print("Creating temporary scratch dir...", file=sys.stderr)
        base_scratch_dir = os.path.abspath(args.scratch_dir)
        scratch_dir = tempfile.mkdtemp(prefix='npr_tmp', dir=base_scratch_dir)
        GLOBALS["scratch_dir"] = scratch_dir
        GLOBALS["basedir"] = scratch_dir
    else:
        GLOBALS["basedir"] = GLOBALS["output_dir"]


    GLOBALS["first_split_outgroup"] = args.first_split

    GLOBALS["email"] = args.email
    GLOBALS["verbosity"] = args.verbosity
    GLOBALS["email_report_time"] = args.email_report_time * 60
    GLOBALS["launch_time"] = args.launch_time
    GLOBALS["cmdline"] = ' '.join(sys.argv)

    GLOBALS["threadinfo"] = defaultdict(dict)
    GLOBALS["seqtypes"] = set()
    GLOBALS["target_species"] = set()
    GLOBALS["target_sequences"] = set()
    GLOBALS["spname_delimiter"] = args.spname_delimiter
    GLOBALS["color_shell"] = True
    GLOBALS["citator"] = Citator()


    GLOBALS["lineages"] = None
    GLOBALS["cogs_file"] = None

    GLOBALS["citator"].add("ETE")

    if not pexist(GLOBALS["basedir"]):
        os.makedirs(GLOBALS["basedir"])

    # when killed, translate signal into exception so program can exit cleanly
    def raise_control_c(_signal, _frame):
        if GLOBALS.get('_background_scheduler', None):
            GLOBALS['_background_scheduler'].terminate()
        raise KeyboardInterrupt
    signal.signal(signal.SIGTERM, raise_control_c)

    # Start the application
    app_wrapper(main, args)
Example #4
0
def _main(arguments):
    global BASEPATH, APPSPATH, args
    APPSPATH = os.path.expanduser("~/.etetoolkit/ext_apps-latest/")
    ETEHOMEDIR = os.path.expanduser("~/.etetoolkit/")

    if os.path.exists(pjoin('/etc/etetoolkit/', 'ext_apps-latest')):
        # if a copy of apps is part of the ete distro, use if by default
        APPSPATH = pjoin('/etc/etetoolkit/', 'ext_apps-latest')
        ETEHOMEDIR = '/etc/etetoolkit/'
    else:
        # if not, try a user local copy
        APPSPATH = pjoin(ETEHOMEDIR, 'ext_apps-latest')

    if len(arguments) == 1:
        if not pexist(APPSPATH):
            print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr)
            print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr)

    elif len(arguments) > 1:
        _config_path = pjoin(BASEPATH, 'phylobuild.cfg')

        if arguments[1] == "install_tools":
            import urllib
            import tarfile
            print (colorify('Downloading latest version of tools...', "green"), file=sys.stderr)
            if len(arguments) > 2:
                TARGET_DIR = arguments[2]
            else:
                TARGET_DIR = ''
            while not pexist(TARGET_DIR):
                TARGET_DIR = input('target directory? [%s]:' %ETEHOMEDIR).strip()
                if TARGET_DIR == '':
                    TARGET_DIR = ETEHOMEDIR
                    break
            if TARGET_DIR == ETEHOMEDIR:
                try:
                    os.mkdir(ETEHOMEDIR)
                except OSError:
                    pass

            version_file = "latest.tar.gz"
            urllib.urlretrieve("https://github.com/jhcepas/ext_apps/archive/%s" %version_file, pjoin(TARGET_DIR, version_file))
            print(colorify('Decompressing...', "green"), file=sys.stderr)
            tfile = tarfile.open(pjoin(TARGET_DIR, version_file), 'r:gz')
            tfile.extractall(TARGET_DIR)
            print(colorify('Compiling tools...', "green"), file=sys.stderr)
            sys.path.insert(0, pjoin(TARGET_DIR, 'ext_apps-latest'))
            import compile_all
            s = compile_all.compile_all()
            sys.exit(s)

        elif arguments[1] == "check":
            if not pexist(APPSPATH):
                print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr)
                print(colorify('Use "ete build install_tools" to install or upgrade', "orange"), file=sys.stderr)
            # setup portable apps
            config = {}
            for k in apps.builtin_apps:
                cmd = apps.get_call(k, APPSPATH, "/tmp", "1")
                config[k] = cmd
            apps.test_apps(config)
            sys.exit(0)

        elif arguments[1] in ("workflows", "wl"):
            if arguments[1] == "wl":
                print(colorify("WARNING: 'wl' is obsolete and will be removed in the future, use 'workflows' instead", "orange"), file=sys.stderr)

            base_config = check_config(_config_path)
            list_workflows(base_config)
            sys.exit(0)

        elif arguments[1] == "apps":
            base_config = check_config(_config_path)
            list_apps(base_config, set(arguments[2:]))
            sys.exit(0)
            
        elif arguments[1] == "show":
            base_config = check_config(_config_path)
            try:
                block = arguments[2]
            except IndexError:
                print("Expected a block name, found none")
                sys.exit(1)

            block_detail(block, base_config)
            sys.exit(0)

        elif arguments[1] == "dump":
            if len(arguments) > 2:
                base_config = check_config(_config_path)
                block_detail(arguments[2], base_config, color=False)
            else:
                print(open(_config_path).read())
            sys.exit(0)

        elif arguments[1] == "validate":
            print('Validating configuration file ', arguments[2])
            if pexist(arguments[2]):
                base_config = check_config(arguments[2])
                print('Everything ok')
            else:
                print('File does not exist')
                sys.exit(-1)
            sys.exit(0)

        elif arguments[1] == "version":
            print(__VERSION__, '(%s)' %__DATE__)
            sys.exit(0)

    parser = argparse.ArgumentParser(description=__DESCRIPTION__ + __EXAMPLES__,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)

    # Input data related flags
    input_group = parser.add_argument_group('==== Input Options ====')

    input_group.add_argument('[check | workflows | apps | show | dump | validate | version | install_tools]',
                             nargs='?',
                             help=("Utility commands:\n"
                                   "check: check that external applications are executable.\n"
                                   "wl: show a list of available workflows.\n"
                                   "show [name]: show the configuration parameters of a given workflow or application config block.\n"
                                   "dump [name]: dump the configuration parameters of the specified block (allows to modify predefined config).\n"
                                   "validate [configfile]: Validate a custom configuration file.\n"
                                   "version: Show current version.\n"
                                   ))

    input_group.add_argument("-c", "--custom-config", dest="custom_config",
                             type=is_file, 
                             help="Custom configuration file.")

    input_group.add_argument("--base-config", dest="base_config",
                             type=is_file, default=BASEPATH+'/phylobuild.cfg',
                             help="Base configuration file.")

    input_group.add_argument("--tools-dir", dest="tools_dir",
                             type=str,
                             help="Custom path where external software is avaiable.")

    input_group.add_argument("-w", dest="workflow",
                             required=True,
                             nargs='+',
                             help="One or more gene-tree workflow names. All the specified workflows will be executed using the same input data.")

    input_group.add_argument("-m", dest="supermatrix_workflow",
                             required=False,
                             nargs='+',
                             help="One or more super-matrix workflow names. All the specified workflows will be executed using the same input data.")

    input_group.add_argument("-a", dest="aa_seed_file",
                             type=is_file,
                             help="Initial multi sequence file with"
                             " protein sequences.")


    input_group.add_argument("-n", dest="nt_seed_file",
                             type=is_file,
                             help="Initial multi sequence file with"
                             " nucleotide sequences")

    # input_group.add_argument("--seqformat", dest="seqformat",
    #                          choices=["fasta", "phylip", "iphylip", "phylip_relaxed", "iphylip_relaxed"],
    #                          default="fasta",
    #                          help="")

    input_group.add_argument("--dealign", dest="dealign",
                             action="store_true",
                             help="when used, gaps in the orginal fasta file will"
                             " be removed, thus allowing to use alignment files as input.")

    input_group.add_argument("--seq-name-parser", dest="seq_name_parser",
                             type=str, 
                             help=("A Perl regular expression containing a matching group, which is"
                                   " used to parse sequence names from the input files. Use this option to"
                                   " customize the names that should be shown in the output files."
                                   " The matching group (the two parentheses) in the provided regular"
                                   " expression will be assumed as sequence name. By default, all "
                                   " characthers until the first blank space or tab delimiter are "
                                   " used as the sequence names."),
                             default='^([^\s]+)')
                                 
    input_group.add_argument("--no-seq-rename", dest="seq_rename",
                             action="store_false",
                             help="If used, sequence names will NOT be"
                             " internally translated to 10-character-"
                             "identifiers.")

    input_group.add_argument("--no-seq-checks", dest="no_seq_checks",
                            action="store_true",
                            help="Skip consistency sequence checks for not allowed symbols, etc.")
    input_group.add_argument("--no-seq-correct", dest="no_seq_correct",
                            action="store_true",
                            help="Skip sequence compatibility changes: i.e. U, J and O symbols are converted into X by default.")

    dup_names_group = input_group.add_mutually_exclusive_group()

    dup_names_group.add_argument("--ignore-dup-seqnames", dest="ignore_dup_seqnames",
                                 action = "store_true",
                                 help=("If duplicated sequence names exist in the input"
                                       " fasta file, a single random instance will be used."))

    dup_names_group.add_argument("--rename-dup-seqnames", dest="rename_dup_seqnames",
                                 action = "store_true",
                                 help=("If duplicated sequence names exist in the input"
                                       " fasta file, duplicates will be renamed."))

    input_group.add_argument("--seqdb", dest="seqdb",
                             type=str,
                             help="Uses a custom sequence database file")


    # supermatrix workflow

    input_group.add_argument("--cogs", dest="cogs_file",
                             type=is_file,
                             help="A file defining clusters of orthologous groups."
                             " One per line. Tab delimited sequence ids. ")

    input_group.add_argument("--lineages", dest="lineages_file",
                             type=is_file,
                             help="EXPERIMENTAL:A file containing the (sorted) lineage "
                                  "track of each species. It enables "
                                  "NPR algorithm to fix what taxonomic "
                                  "levels should be optimized."
                                  "Note that linage tracks must consist in "
                                  "a comma separated list of taxonomic levels "
                                  "sorted from deeper to swallower clades "
                                  "(i.e. 9606 [TAB] Eukaryotes,Mammals,Primates)"
                             )

    input_group.add_argument("--spname-delimiter", dest="spname_delimiter",
                             type=str, default="_",
                             help="spname_delimiter is used to split"
                             " the name of sequences into species code and"
                             " sequence identifier (i.e. HUMAN_p53 = HUMAN, p53)."
                             " Note that species name must always precede seq.identifier.")

    input_group.add_argument("--spfile", dest="spfile",
                             type=is_file,
                             help="If specified, only the sequences and ortholog"
                             " pairs matching the group of species in this file"
                             " (one species code per line) will be used. ")

    npr_group = parser.add_argument_group('==== NPR options ====')
    npr_group.add_argument("-r", "--recursive", dest="npr_workflows",
                           required=False,
                           nargs="*",
                           help="EXPERIMENTAL:Enables recursive NPR capabilities (Nested Phylogenetic Reconstruction)"
                           " and specifies custom workflows and filters for each NPR iteration.")
    npr_group.add_argument("--nt-switch-threshold", dest="nt_switch_thr",
                           required=False,
                           type=float,
                           default = 0.95,
                           help="Sequence similarity at which nucleotide based alignments should be used"
                           " instead of amino-acids. ")
    npr_group.add_argument("--max-iters", dest="max_iters",
                           required=False,
                           type=int,
                           default=99999999,
                           help="EXPERIMENTAL:Set a maximum number of NPR iterations allowed.")
    npr_group.add_argument("--first-split-outgroup", dest="first_split",
                           type=str,
                           default='midpoint',
                           help=("EXPERIMENTAL:When used, it overrides first_split option"
                                 " in any tree merger config block in the"
                                 " config file. Default: 'midpoint' "))


    # Output data related flags
    output_group = parser.add_argument_group('==== Output Options ====')
    output_group.add_argument("-o", "--outdir", dest="outdir",
                              type=str, required=True,
                              help="""Output directory for results.""")

    output_group.add_argument("--scratch-dir", dest="scratch_dir",
                              type=is_dir,
                              help="""If provided, ete-build will run on the scratch folder and all files will be transferred to the output dir when finished. """)

    output_group.add_argument("--db-dir", dest="db_dir",
                              type=is_dir,
                              help="""Alternative location of the database directory""")

    output_group.add_argument("--tasks-dir", dest="tasks_dir",
                              type=is_dir,
                              help="""Output directory for the executed processes (intermediate files).""")

    output_group.add_argument("--compress", action="store_true",
                              help="Compress all intermediate files when"
                              " a workflow is finished.")

    output_group.add_argument("--logfile", action="store_true",
                              help="Log messages will be saved into a file named npr.log within the output directory.")

    output_group.add_argument("--noimg", action="store_true",
                              help="Tree images will not be generated when a workflow is finished.")

    output_group.add_argument("--email", dest="email",
                              type=str,
                              help="EXPERIMENTAL:Send an email when errors occur or a workflow is done.")

    output_group.add_argument("--email-report-time", dest="email_report_time",
                              type=int, default = 0,
                              help="EXPERIMENTAL:How often (in minutes) an email reporting the status of the execution should be sent. 0=No reports")


    # Task execution related flags
    exec_group = parser.add_argument_group('==== Execution Mode Options ====')

    exec_group.add_argument("-C", "--cpu", dest="maxcores", type=int,
                            default=1, help="Maximum number of CPU cores"
                            " available in the execution host. If higher"
                            " than 1, tasks with multi-threading"
                            " capabilities will enabled. Note that this"
                            " number will work as a hard limit for all applications,"
                            "regardless of their specific configuration.")

    exec_group.add_argument("-t", "--schedule-time", dest="schedule_time",
                            type=float, default=2,
                            help="""How often (in secs) tasks should be checked for available results.""")

    exec_group.add_argument("--launch-time", dest="launch_time",
                            type=float, default=3,
                            help="""How often (in secs) queued jobs should be checked for launching""")

    exec_type_group = exec_group.add_mutually_exclusive_group()

    exec_type_group.add_argument("--noexec", dest="no_execute",
                                 action="store_true",
                                 help=("Prevents launching any external application."
                                       " Tasks will be processed and intermediate steps will"
                                       " run, but no real computation will be performed."))

    # exec_type_group.add_argument("--sge", dest="sge_execute",
    #                              action="store_true", help="EXPERIMENTAL!: Jobs will be"
    #                              " launched using the Sun Grid Engine"
    #                              " queue system.")

    exec_group.add_argument("--monitor", dest="monitor",
                            action="store_true",
                            help="Monitor mode: pipeline jobs will be"
                            " detached from the main process. This means that"
                            " when npr execution is interrupted, all currently"
                            " running jobs will keep running. Use this option if you"
                            " want to stop and recover an execution thread or"
                            " if jobs are expected to be executed remotely."
                            )

    exec_group.add_argument("--resume", dest="resume",
                            action="store_true",
                            help="If output directory exists, reuse data from it if possible. ")

    exec_group.add_argument("--clearall", dest="clearall",
                            action="store_true",
                            help="If output directory exists, erase all previous data and start a clean execution.")

    
    exec_group.add_argument("--softclear", dest="softclear",
                            action="store_true",
                            help="Clear all precomputed data (data.db), but keeps task raw data in the directory, so they can be re-processed.")

    exec_group.add_argument("--clear-seqdb", dest="clearseqs",
                            action="store_true",
                            help="Reload sequences deleting previous database if necessary.")

    # exec_group.add_argument("--arch", dest="arch",
    #                         choices=["auto", "32", "64"],
    #                         default="auto", help="Set the architecture of"
    #                         " execution hosts (needed only when using"
    #                         " built-in applications.)")

    exec_group.add_argument("--nochecks", dest="nochecks",
                            action="store_true",
                            help="Skip basic checks (i.e. tools available) everytime the application starts.")

    # Interface related flags
    ui_group = parser.add_argument_group("==== Program Interface Options ====")
    # ui_group.add_argument("-u", dest="enable_ui",
    #                     action="store_true", help="When used, a color"
    #                     " based interface is launched to monitor NPR"
    #                     " processes. This feature is EXPERIMENTAL and"
    #                     " requires NCURSES libraries installed in your"
    #                     " system.")

    ui_group.add_argument("-v", dest="verbosity",
                          default=0,
                          type=int, choices=[0,1,2,3,4],
                          help="Verbosity level: 0=very quiet, 4=very "
                          " verbose.")

    ui_group.add_argument("--debug", nargs="?",
                          const="all",
                          help="Start debugging"
                          " A taskid can be provided, so"
                          " debugging will start from such task on.")

    args = parser.parse_args(arguments)
    if args.tools_dir:
        APPSPATH = args.tools_dir

    if not pexist(APPSPATH):
        print(colorify('\nWARNING: external applications directory are not found at %s' %APPSPATH, "yellow"), file=sys.stderr)
        print(colorify('Use "ete build install_tools" to install or upgrade tools', "orange"), file=sys.stderr)

    args.enable_ui = False
    if not args.noimg:
        print('Testing ETE-build graphics support...')
        print('X11 DISPLAY = %s' %colorify(os.environ.get('DISPLAY', 'not detected!'), 'yellow'))
        print('(You can use --noimg to disable graphical capabilities)')
        try:
            from .. import Tree
            Tree().render('/tmp/etenpr_img_test.png')
        except:
            raise ConfigError('img generation not supported')

    if not args.aa_seed_file and not args.nt_seed_file:
        parser.error('At least one input file argument (-a, -n) is required')

    outdir = os.path.abspath(args.outdir)
    final_dir, runpath = os.path.split(outdir)
    if not runpath:
        raise ValueError("Invalid outdir")

    GLOBALS["output_dir"] = os.path.abspath(args.outdir)

    if args.scratch_dir:
        # set paths for scratch folder for sqlite files
        print("Creating temporary scratch dir...", file=sys.stderr)
        base_scratch_dir = os.path.abspath(args.scratch_dir)
        scratch_dir = tempfile.mkdtemp(prefix='npr_tmp', dir=base_scratch_dir)
        GLOBALS["scratch_dir"] = scratch_dir
        GLOBALS["basedir"] = scratch_dir
    else:
        GLOBALS["basedir"] = GLOBALS["output_dir"]


    GLOBALS["first_split_outgroup"] = args.first_split

    GLOBALS["email"] = args.email
    GLOBALS["verbosity"] = args.verbosity
    GLOBALS["email_report_time"] = args.email_report_time * 60
    GLOBALS["launch_time"] = args.launch_time
    GLOBALS["cmdline"] = ' '.join(arguments)

    GLOBALS["threadinfo"] = defaultdict(dict)
    GLOBALS["seqtypes"] = set()
    GLOBALS["target_species"] = set()
    GLOBALS["target_sequences"] = set()
    GLOBALS["spname_delimiter"] = args.spname_delimiter
    GLOBALS["color_shell"] = True
    GLOBALS["citator"] = Citator()


    GLOBALS["lineages"] = None
    GLOBALS["cogs_file"] = None

    GLOBALS["citator"].add("ETE")

    if not pexist(GLOBALS["basedir"]):
        os.makedirs(GLOBALS["basedir"])

    # when killed, translate signal into exception so program can exit cleanly
    def raise_control_c(_signal, _frame):
        if GLOBALS.get('_background_scheduler', None):
            GLOBALS['_background_scheduler'].terminate()
        raise KeyboardInterrupt
    signal.signal(signal.SIGTERM, raise_control_c)

    # Start the application
    app_wrapper(main, args)