def get_btax_blastdb(btax_dict, db_dir, btr_profiles=None, num_threads=None, config_path=None): if config_path: conf_constants.update_by_config(config_path=config_path) conf_constants_db.update_by_config(config_path=config_path) if not num_threads: num_threads = conf_constants.num_threads else: conf_constants.num_threads = num_threads # Can be parallel for btax_name in btax_dict: btax_info = BtaxInfo.load_from_dict(btax_dict[btax_name]) btax_info.btax_fna, btax_info.fna_id, downloaded_fna = get_btax_fna( btax_genomes=btax_info.genomes, btax_name=btax_name, db_dir=db_dir) for i, btax_genome in enumerate(btax_info.genomes): genome_info = GenomeInfo.load_from_dict(btax_genome) if genome_info.genome_id in downloaded_fna: genome_info.fna_path = downloaded_fna[genome_info.genome_id] btax_info.genomes[i] = genome_info.get_json() btax_info.blastdb = create_btax_blastdb( btax_fna_path=btax_info.btax_fna, btax_name=btax_name, db_dir=db_dir, blast_inst_dir=conf_constants.blast_inst_dir, logger=eagle_logger) if btr_profiles is not None: # create repr profile pass btax_dict[btax_name] = btax_info.get_json() return btax_dict
def _parse_cmd_args(*args): parser = argparse.ArgumentParser() parser.add_argument( "-dbt", "--dbtype", help="The type of database to create (bacteria or archea or eukaryota)", required=True) parser.add_argument( "-irefseq", "--input-table-refseq", help="Path to a table with organisms to download from NCBI refseq", required=False, default=None) parser.add_argument( "-igenbank", "--input-table-genbank", help="Path to a table with organisms to download from NCBI genbank", required=False, default=None) parser.add_argument( "-icustom", "--input-table-custom", help= "Path to a table with custom genomes and their taxonomy (not implemented yet). " "The table consists of two necessary columns (1 - genome path; 2 - genome taxonomy) " "and one optional column (3 - organism name)", required=False, default=None) parser.add_argument( "-btl", "--btax-level", help="The taxonomic level to split input genomes into base taxons " "(1 - species, 2 - genus, 3 - family, etc)", required=False, default=int()) parser.add_argument( "-btcp", "--btax-class-profile", help= "The path to HMM profile of sequences that should be used for base taxons classification " "while the db construction (not implemented yet)", required=False, default=None) parser.add_argument( "-btrp", "--btax-rep-profile", help= "The path to HMM profile of sequences that should be used for a base taxon response " "while essential and advantageous genes exploration (not implemented yet)", required=False, default=None) parser.add_argument("-d", "--db-dir", help="Path to a directory to collect database files", required=False, default=None) parser.add_argument("-nt", "--num-threads", help="Threads number (can be set in config file)", required=False, default=conf_constants.num_threads) parser.add_argument( "-po", "--prepared-organisms", help='Path to a json with organisms not to prepare listed. ' 'Format as follows: {"org_name": true}', required=False, default=None) parser.add_argument( "-poinf", "--prepared-organisms-info", help='Path to a json with info for organisms just prepared', required=False, default=None) parser.add_argument("-c", "--config-path", help="Path to a config file", required=False, default=None) cmd_args = parser.parse_args(args) if cmd_args.config_path: conf_constants.update_by_config(config_path=cmd_args.config_path) conf_constants_db.update_by_config(config_path=cmd_args.config_path) cmd_args.num_threads = conf_constants.num_threads return cmd_args.__dict__
def create_bactdb(input_table_refseq=None, input_table_genbank=None, input_table_custom=None, btax_level=int(), btax_class_profile=None, btax_rep_profile=None, db_dir=DEFAULT_BACTDB_DIR, num_threads=None, prepared_genomes=PREPARED_BACTERIA_F_NAME, prepared_genomes_info=None, config_path=None, **kwargs): if config_path: conf_constants.update_by_config(config_path=config_path) conf_constants_db.update_by_config(config_path=config_path) if not btax_level: btax_level = conf_constants_db.btax_level else: conf_constants_db.btax_level = btax_level if not db_dir: db_dir = DEFAULT_BACTDB_DIR if num_threads: int_num_threads = int(num_threads) num_threads = None num_threads = int_num_threads conf_constants.num_threads = num_threads else: num_threads = conf_constants.num_threads if not prepared_genomes: prepared_genomes = PREPARED_BACTERIA_F_NAME if btax_class_profile is not None: # TODO: implement loading btc_profiles from custom profiles eagle_logger.warning( "custom btax classification profiles are not implemented currently - default will be used" ) # else: btc_profiles = [ SeqProfileInfo(name="16S_rRNA", seq_type="nucl").get_json() ] # TODO: include it to 'else' bock if btax_rep_profile is not None: # TODO: implement loading btr_profiles from custom profiles eagle_logger.warning( "custom btax representative profiles are not implemented currently - default will be used" ) # else: btr_profiles = None # TODO: include it to 'else' bock # TODO: this code should not get the btax classification sequence (16S rRNA) if input_table_custom is None and input_table_refseq is None and input_table_genbank is None: input_table_refseq = DEFAULT_REFSEQ_BACTERIA_TABLE input_table_genbank = DEFAULT_GENBANK_BACTERIA_TABLE bacteria_list = list() if input_table_refseq is not None or input_table_genbank is not None: bacteria_list = get_bacteria_from_ncbi( refseq_bacteria_table=input_table_refseq, genbank_bacteria_table=input_table_genbank, bactdb_dir=db_dir, num_threads=num_threads, prepared_bacteria_f_path=prepared_genomes) if input_table_custom is not None: eagle_logger.warning("custom genomes input is not implemented yet") # TODO: implement custom genomes input # bacteria_list.extend() if prepared_genomes_info: with open(prepared_genomes_info) as prep_genomes_info_f: bacteria_list = join_genomes_lists( genomes_list_1=bacteria_list, genomes_list_2=json.load(prep_genomes_info_f)) # TODO: implement code to obtain btax classification sequence from fna with hmm profile # profiles input should be a list of SeqProfilesInfo objects # result - btc_seqs_path field of GenomeInfo objects in bacteria_list filled # currently it is filled during get_bacteria_from_ncbi run - not good btax_dict = get_btax_dict(genomes_list=bacteria_list, btax_level=btax_level, btc_profiles=btc_profiles, db_dir=db_dir, num_threads=num_threads, build_tree=not bool(btr_profiles)) btax_dict = get_btax_blastdb(btax_dict, db_dir=db_dir, btr_profiles=btr_profiles, num_threads=num_threads) repr_profiles_path = create_profiles_db( btax_dict, db_dir=db_dir, profiles_db_name=PROFILES_DB_NAME, method="hmmer", hmmer_inst_dir=conf_constants.hmmer_inst_dir, config_path=config_path, logger=eagle_logger) with open(os.path.join(db_dir, BTAX_JSON_NAME), "w") as btax_json_f: json.dump(btax_dict, btax_json_f, indent=2) # maybe btax_dict will be dumped in get_btax_dict db_info = DBInfo(all_genomes=os.path.join(db_dir, BACTERIA_LIST_F_NAME), btax_json=os.path.join(db_dir, BTAX_JSON_NAME), repr_profiles=repr_profiles_path, global_dist_matrix=os.path.join( db_dir, BACTERIA_GLOBAL_DIST_MATRIX), all_org_full_names=os.path.join( db_dir, BACTERIA_SHORT_TO_FULL_ORG_NAMES)).get_json() with open(os.path.join(db_dir, DB_INFO_NAME), "w") as db_info_f: json.dump(db_info, db_info_f, indent=2) return db_info
def get_families_dict(bacteria_list, db_dir, num_threads=None, only_repr=False, config_path=None): if config_path: conf_constants.update_by_config(config_path=config_path) conf_constants_db.update_by_config(config_path=config_path) if not num_threads: num_threads = conf_constants.num_threads else: conf_constants.num_threads = num_threads if not only_repr: only_repr = conf_constants_db.only_repr else: conf_constants_db.only_repr = only_repr families_dict = dict() for bacterium in bacteria_list: if not bacterium: continue if not os.path.exists(bacterium["16S_rRNA_file"]): continue bacterium_data = { "download_prefix": bacterium["download_prefix"], "16S_rRNA_file": bacterium["16S_rRNA_file"], "fna_file": None, "source_db": bacterium["source_db"], "repr": bacterium['repr'] } if only_repr and not bacterium['repr']: continue if families_dict.get(bacterium['family'], None): if families_dict[bacterium['family']].get(bacterium['genus'], None): if families_dict[bacterium['family']][bacterium['genus']].get( bacterium['species'], None): families_dict[bacterium['family']][bacterium['genus']][bacterium['species']][bacterium['strain']] =\ bacterium_data else: families_dict[bacterium['family']][bacterium['genus']][bacterium['species']] = \ {bacterium['strain']: bacterium_data} else: families_dict[bacterium['family']][bacterium['genus']] = \ {bacterium['species']: {bacterium['strain']: bacterium_data} } else: families_dict[bacterium['family']] = \ {bacterium['genus']: {bacterium['species']: {bacterium['strain']: bacterium_data} } # "16S_rRNA_tree": None, # "WGS_tree": None, # "16S_rRNA_gtf": os.path.join(db_dir, bacterium['family']+"_16S_rRNA.gtf"), # "WGS_gtf": os.path.join(db_dir, bacterium['family']+"_WGS.gtf"), # "16S_rRNA_profile": None, # "WGS_profile": None, } bact_fam_f_path = os.path.join(db_dir, BACT_FAM_F_NAME) prepare_families(families_dict, db_dir, bact_fam_f_path, num_threads=num_threads) return json.load(open(bact_fam_f_path))
def get_btax_dict(genomes_list, btax_level, btc_profiles, db_dir, num_threads=None, build_tree=False, config_path=None, **kwargs): if config_path: conf_constants.update_by_config(config_path=config_path) conf_constants_db.update_by_config(config_path=config_path) if not num_threads: num_threads = conf_constants.num_threads else: conf_constants.num_threads = num_threads btax_dict = defaultdict(BtaxInfo) btc_fasta_dict = defaultdict(dict) seq_ids_to_orgs = dict() for genome_dict in genomes_list: if not genome_dict: continue genome_info = GenomeInfo.load_from_dict(genome_dict) if not genome_info.btc_seqs_id: continue btax_name = None try: btax_name = genome_info.taxonomy[-btax_level] except IndexError: btax_name = genome_info.taxonomy[0] btax_dict[btax_name].genomes.append(genome_info.get_json()) if btax_dict[btax_name].name is None: btax_dict[btax_name].name = btax_name btc_seqs_fasta_dict = load_fasta_to_dict(genome_info.btc_seqs_fasta) for btc_seq_id in genome_info.btc_seqs_id: seq_ids_to_orgs[btc_seq_id] = genome_info.org_name btc_fasta_dict[genome_info.btc_seqs_id[btc_seq_id]][ btc_seq_id] = btc_seqs_fasta_dict[btc_seq_id] btc_profile_types = dict() for btc_profile_dict in btc_profiles: btc_profile_info = SeqProfileInfo.load_from_dict(btc_profile_dict) btc_profile_types[btc_profile_info.name] = btc_profile_info.seq_type btc_dist_dict = dict() btc_aln_dict = dict() short_to_full_seq_names = dict() for btc_profile_name in btc_fasta_dict: btc_mult_aln = construct_mult_aln( seq_dict=btc_fasta_dict[btc_profile_name], aln_type=btc_profile_types[btc_profile_name], aln_name=btc_profile_name + "_aln", tmp_dir=kwargs.get("aln_tmp_dir", "mult_aln_tmp"), method=conf_constants_db.btc_profile_aln_method, num_threads=num_threads, logger=eagle_logger, op=5.0, ep=0.5, **kwargs) # low_memory can be set through kwargs # TODO: only the code from else block should be remained after moving 16S rRNA obtaining out from get_bacteria_from_ncbi if btc_profile_name == "16S_rRNA": btc_mult_aln.short_to_full_seq_names = \ reduce_seq_names({re.sub("lcl\|(N(C|Z)_)?", "", seq_name): seq_name for seq_name in btc_mult_aln}, num_letters=10, num_words=1)[0] else: btc_mult_aln.short_to_full_seq_names = short_to_full_seq_names.copy( ) btc_mult_aln.remove_paralogs(seq_ids_to_orgs=seq_ids_to_orgs, inplace=True) btc_mult_aln.improve_aln(inplace=True) btc_dist_dict[btc_profile_name] = btc_mult_aln.get_distance_matrix( ) # TODO: implement specific positions method short_to_full_seq_names.update(btc_mult_aln.short_to_full_seq_names) if kwargs.get("save_alignments", False): btc_mult_aln.dump_alignment( aln_fasta_path=os.path.join(db_dir, btc_mult_aln.aln_name + ".fasta")) btc_mult_aln.rename_seqs(seq_ids_to_orgs) btc_aln_dict[btc_profile_name] = deepcopy(btc_mult_aln) global_dist_matr = get_global_dist(btc_dist_dict, btc_profiles, seq_ids_to_orgs) global_dist_matr_path = os.path.join(db_dir, BACTERIA_GLOBAL_DIST_MATRIX) short_to_full_seq_names_path = os.path.join( db_dir, BACTERIA_SHORT_TO_FULL_ORG_NAMES) short_to_full_seq_names = global_dist_matr.dump( matrix_path=global_dist_matr_path, matr_format="phylip") with open(short_to_full_seq_names_path, "w") as short_to_full_org_names_f: json.dump(short_to_full_seq_names, short_to_full_org_names_f, indent=2) eagle_logger.info("base taxons standardisation started") btax_dict = standardize_btax(btax_dict=btax_dict, global_dist_matr=global_dist_matr) eagle_logger.info("base taxons standardisation finished") full_to_short_seq_names = { v: k for k, v in short_to_full_seq_names.items() } for btax_name in btax_dict: btax_orgs = set( GenomeInfo.load_from_dict(genome).org_name for genome in btax_dict[btax_name].genomes) if build_tree: btax_dict[btax_name].mean_d = global_dist_matr[btax_orgs].mean_dist btax_dict[btax_name].median_d = global_dist_matr[ btax_orgs].median_dist if len(btax_orgs) > 2: btax_dict[btax_name].ref_tree_newick = build_tree_by_dist( global_dist_matr[btax_orgs], tree_name=btax_name + "_tree").newick btax_btc_aln_dict = dict() for btc_profile_name, btc_aln in btc_aln_dict.items(): btax_btc_aln = btc_aln[btax_orgs].improve_aln( inplace=False) btax_btc_aln.aln_name = btax_name + "_" + btc_profile_name btax_btc_aln_dict[btc_profile_name] = deepcopy( btax_btc_aln) btax_dict[btax_name].repr_profiles = generate_btax_profiles( btax_btc_aln_dict, db_dir=db_dir, btax_name=btax_name, method="hmmer") btax_dict[btax_name].ref_tree_full_names = \ {full_to_short_seq_names[btax_org]: btax_org for btax_org in btax_orgs} btax_dict[btax_name] = btax_dict[btax_name].get_json() return btax_dict
def get_bacteria_from_ncbi(refseq_bacteria_table=None, genbank_bacteria_table=None, bactdb_dir=DEFAULT_BACTDB_DIR, num_threads=None, first_bact=None, last_bact=None, prepared_bacteria_f_path=PREPARED_BACTERIA_F_NAME, remove_bact_list_f=False, config_path=None): if config_path: conf_constants.update_by_config(config_path=config_path) conf_constants_db.update_by_config(config_path=config_path) if not num_threads: num_threads = conf_constants.num_threads else: conf_constants.num_threads = num_threads if refseq_bacteria_table is None and genbank_bacteria_table is None: refseq_bacteria_table = DEFAULT_REFSEQ_BACTERIA_TABLE genbank_bacteria_table = DEFAULT_GENBANK_BACTERIA_TABLE try: os.makedirs(bactdb_dir) except OSError: eagle_logger.warning("bactdb directory exists") prepared_bacteria = mp.Manager().dict() if os.path.exists(prepared_bacteria_f_path): eagle_logger.info("loading prepared bacteria from '%s'" % prepared_bacteria_f_path) prepared_bacteria_f = open(prepared_bacteria_f_path) prepared_bacteria.update(json.load(prepared_bacteria_f)) prepared_bacteria_f.close() eagle_logger.info("prepared bacteria loaded") bacteria_list_f_path = os.path.join(bactdb_dir, BACTERIA_LIST_F_NAME) bacteria_list_f = io.open(bacteria_list_f_path, 'w', newline="\n") bacteria_list_f.write(u"[\n") bacteria_list_f.close() refseq_df = pandas.read_csv(refseq_bacteria_table, sep="\t", dtype=str).sort_values(by="ncbi_link") genbank_df = pandas.read_csv(genbank_bacteria_table, sep="\t", dtype=str).sort_values(by="ncbi_link") n = 1 i = 0 j = 0 params_list = list() while i < refseq_df.shape[0] or j < genbank_df.shape[0]: if first_bact and n < first_bact: n += 1 continue if last_bact and n > last_bact: break if i >= refseq_df.shape[0] or j >= genbank_df.shape[0]: if i >= refseq_df.shape[0]: params_list.append({ 'function': get_bacterium, 'prepared_bacteria': prepared_bacteria, 'logger_name': eagle_logger.name, 'ncbi_db_link': genbank_df.iloc[j]["ncbi_link"], 'bacterium_name': genbank_df.iloc[j]["org_name"], 'is_repr': bool_from_str(genbank_df.iloc[j]["repr"]), 'db_dir': bactdb_dir, 'source_db': "genbank", 'try_err_message': "%s is not prepared: " % genbank_df.iloc[j]["org_name"], }) j += 1 else: params_list.append({ 'function': get_bacterium, 'prepared_bacteria': prepared_bacteria, 'logger_name': eagle_logger.name, 'ncbi_db_link': refseq_df.iloc[i]["ncbi_link"], 'bacterium_name': refseq_df.iloc[i]["org_name"], 'is_repr': bool_from_str(refseq_df.iloc[i]["repr"]), 'db_dir': bactdb_dir, 'source_db': "refseq", 'try_err_message': "%s is not prepared: " % refseq_df.iloc[i]["org_name"], }) i += 1 else: if genbank_df.iloc[j]["ncbi_link"].replace( "GCA", "GCF") < refseq_df.iloc[i]["ncbi_link"]: params_list.append({ 'function': get_bacterium, 'prepared_bacteria': prepared_bacteria, 'logger_name': eagle_logger.name, 'ncbi_db_link': genbank_df.iloc[j]["ncbi_link"], 'bacterium_name': genbank_df.iloc[j]["org_name"], 'is_repr': bool_from_str(genbank_df.iloc[j]["repr"]), 'db_dir': bactdb_dir, 'source_db': "genbank", 'try_err_message': "%s is not prepared: " % genbank_df.iloc[j]["org_name"], }) j += 1 else: params_list.append({ 'function': get_bacterium, 'prepared_bacteria': prepared_bacteria, 'logger_name': eagle_logger.name, 'ncbi_db_link': refseq_df.iloc[i]["ncbi_link"], 'bacterium_name': refseq_df.iloc[i]["org_name"], 'is_repr': bool_from_str(refseq_df.iloc[i]["repr"]), 'db_dir': bactdb_dir, 'source_db': "refseq", 'try_err_message': "%s is not prepared: " % refseq_df.iloc[i]["org_name"], }) i += 1 if genbank_df.iloc[j]["ncbi_link"].replace( "GCA", "GCF") == refseq_df.iloc[i - 1]["ncbi_link"]: j += 1 n += 1 eagle_logger.info("got download links for %s bacteria" % len(params_list)) pool = mp.Pool(num_threads) pool.map(worker, params_list) pool.close() pool.join() prepared_bacteria_f = open( os.path.join(bactdb_dir, PREPARED_BACTERIA_F_NAME), "w") json.dump(dict(prepared_bacteria), prepared_bacteria_f) prepared_bacteria_f.close() bacteria_list_f = io.open(bacteria_list_f_path, 'a', newline="\n") bacteria_list_f.write(u" {}\n]") bacteria_list_f.close() with open(bacteria_list_f_path) as bacteria_list_f: return json.load(bacteria_list_f)
def explore_orfs(in_fasta, db_json, out_dir="", min_orf_l=None, btax_name=None, num_threads=None, btax_det_method="hmmer", config_path=None, **kwargs): if config_path: conf_constants.update_by_config(config_path) if num_threads: conf_constants.num_threads = int(num_threads) num_threads = None num_threads = conf_constants.num_threads if min_orf_l: conf_constants.min_orf_l = min_orf_l min_orf_l = None min_orf_l = conf_constants.min_orf_l if not os.path.exists(out_dir): os.makedirs(out_dir) if kwargs.get("save_alignments", False) and not os.path.exists( os.path.join(out_dir, ORF_ALNS_DIR)): os.makedirs(os.path.join(out_dir, ORF_ALNS_DIR)) if kwargs.get("save_trees", False) and not os.path.exists( os.path.join(out_dir, ORF_TREES_DIR)): os.makedirs(os.path.join(out_dir, ORF_TREES_DIR)) if type(db_json) is str: with open(db_json) as db_json_f: db_info = DBInfo.load_from_dict(json.load(db_json_f)) elif isinstance(db_json, dict): db_info = DBInfo.load_from_dict(db_json) else: eagle_logger.error("Unsupported type of value for 'db_json' argument") return with open(db_info.btax_json) as btax_dict_f: btax_dict = json.load(btax_dict_f) if btax_name is None: btax_name = get_btax_name(in_fasta, db_info.repr_profiles, btax_names=btax_dict.keys(), work_dir=out_dir, num_threads=conf_constants.num_threads, method=btax_det_method, hmmer_inst_dir=conf_constants.hmmer_inst_dir, config_path=config_path) orfs_fasta_path = os.path.join(out_dir, os.path.basename(in_fasta) + ".orfs") res_gtf_json = get_orfs(in_fasta_path=in_fasta, out_fasta_path=orfs_fasta_path, minsize=min_orf_l) blast_handler = BlastHandler(inst_dir=conf_constants.blast_inst_dir, config_path=config_path, logger=eagle_logger) if btax_name == "Unclassified": eagle_logger.warning( "The family was not detected - cannot run further analysis") else: btax_info = BtaxInfo.load_from_dict(btax_dict[btax_name]) eagle_logger.info("Family '%s' will be used for the sequence from %s" % (btax_name, in_fasta)) tblastn_out_path = kwargs.get("tblastn_result_path", None) # for debug and testing if tblastn_out_path is None: tblastn_out_path = os.path.join(out_dir, os.path.basename(in_fasta) + ".bl") blast_handler.run_blast_search(blast_type="tblastn", query=orfs_fasta_path, db=btax_info.blastdb, out=tblastn_out_path, num_threads=num_threads) res_gtf_json = analyze_tblastn_out( tblastn_out_path=tblastn_out_path, orfs_fasta_path=orfs_fasta_path, in_fasta=in_fasta, btax_data=btax_dict[btax_name], res_gtf_json=res_gtf_json, num_threads=conf_constants.num_threads, work_dir=out_dir, save_alignments=kwargs.get("save_alignments", False), save_trees=kwargs.get("save_trees", False)) res_gtf_df = pd.DataFrame(res_gtf_json.values()) res_gtf_df.sort_values("start", inplace=True) res_gtf_df = res_gtf_df[[ "seqid", "source", "type", "start", "end", "score", "strand", "frame", "attribute" ]] res_gtf_df.to_csv(os.path.join(out_dir, os.path.basename(in_fasta) + ".gtf"), sep="\t", index=False, quotechar="'")
def _parse_cmd_args(*args): parser = argparse.ArgumentParser() parser.add_argument("-i", "--in-fasta", help="Path to input fasta file", required=True) parser.add_argument("-db", "--db-json", help="Path to json with eagledb to use description", required=True) parser.add_argument("-o", "--out-dir", help="Path to the directory for output", required=False, default="") parser.add_argument("-l", "--min-orf-l", help="Minimal length for ORF to analyze", required=False) parser.add_argument( "-btn", "--btax-name", help= "The name of base taxon. If specified eagle will not scan the eagledb and " "will work straight with this base taxon. Applicable only with 'genome' mode", required=False, default=None) parser.add_argument("-nt", "--num-threads", help="Number of threads", required=False, default=conf_constants.num_threads) parser.add_argument( "-btd", "--btax-det-method", help= "Method name to detect base taxon for input sequence (default: 'hmmer')", required=False, default="hmmer") parser.add_argument("-c", "--config-path", help="Path to a config file", required=False, default=None) parser.add_argument("-tbnr", "--tblastn-result-path", help="Path to tblastn result (outfmt 7) if it exists", required=False, default=None) parser.add_argument( "-sa", "--save-alignments", help="Set it '1' if ORFs multiple alignments are needed to be saved", required=False, default=False) parser.add_argument( "-st", "--save-trees", help="Set it '1' if ORFs phylogenetic trees are needed to be saved", required=False, default=False) cmd_args = parser.parse_args(args) if cmd_args.config_path: conf_constants.update_by_config(config_path=cmd_args.config_path) cmd_args.num_threads = conf_constants.num_threads return cmd_args.__dict__