def create_scientific_table(nodes_ncbi, names_ncbi): ranks = nodes_ncbi['rank'].unique() phylums = get_bind_ids_bact([2, 2323], nodes_ncbi, is_get_child_id=True, rank_ex=ranks[ranks != 'phylum'])['id'] classes = [pd.DataFrame({'phylum': phylum, 'class': get_bind_ids_bact([phylum], nodes_ncbi, is_get_child_id=True, rank_ex=ranks[ranks != 'class'])['id']}) for phylum in phylums.values] classes = pd.concat(classes) orders = [pd.DataFrame({'phylum': row['phylum'], 'class': row['class'], 'order': get_bind_ids_bact([row['class'], row['phylum']], nodes_ncbi, is_get_child_id=True, rank_ex=ranks[ranks != 'order'])['id']}) for i, row in classes.iterrows()] orders = pd.concat(orders) families = [pd.DataFrame({'phylum': row['phylum'], 'class': row['class'], 'order': row['order'], 'family': get_bind_ids_bact([row['order']], nodes_ncbi, is_get_child_id=True, rank_ex=ranks[ranks != 'family'])['id']}) for i, row in orders.iterrows()] families = pd.concat(families) genuses = [pd.DataFrame({'phylum': row['phylum'], 'class': row['class'], 'order': row['order'], 'family': row['family'], 'genus': get_bind_ids_bact([row['family']], nodes_ncbi, is_get_child_id=True, rank_ex=ranks[ranks != 'genus'])['id']}) for i, row in families.iterrows()] genuses = pd.concat(genuses) species = [pd.DataFrame({'phylum': row['phylum'], 'class': row['class'], 'order': row['order'], 'family': row['family'], 'genus': row['genus'], 'species': get_bind_ids_bact([row['genus']], nodes_ncbi, is_get_child_id=True, rank_ex=ranks[ranks != 'species'])['id']}) for i, row in genuses.iterrows()] species = pd.concat(species) names_ncbi_sci = names_ncbi[names_ncbi['class'] == 'scientific name'] species = replace_id_by_name(species, names_ncbi_sci, 'species') species = replace_id_by_name(species, names_ncbi_sci, 'genus') species = replace_id_by_name(species, names_ncbi_sci, 'family') species = replace_id_by_name(species, names_ncbi_sci, 'order') species = replace_id_by_name(species, names_ncbi_sci, 'class') species = replace_id_by_name(species, names_ncbi_sci, 'phylum') return species
def create_all_bact_catalog(nodes_ncbi): ids = get_bind_ids_bact([2], nodes_ncbi, is_get_child_id=True) return ids