def add_mesh_parents(bio_ontology: BioOntology): """Add missing root level nodes to the MeSH ontology.""" for letter, name in mesh_roots_map.items(): bio_ontology.add_node(bio_ontology.label('MESH', letter), name=name) edges_to_add = [] for node in bio_ontology.nodes(): # First deal with subtree root nodes subtree = is_mesh_subroot_node(bio_ontology, node) if subtree is not None: edges_to_add.append(( node, bio_ontology.label('MESH', subtree), {'type': 'isa'} )) db_ns, db_id = bio_ontology.get_ns_id(node) # Then deal with supplementary concepts if db_ns == 'MESH' and db_id.startswith('C') \ and db_id != 'C': # To skip the previously added subroot node edges_to_add.append(( node, bio_ontology.label('MESH', 'S'), {'type': 'isa'} )) bio_ontology.add_edges_from(edges_to_add)
def add_ido_parents(bio_ontology: BioOntology): ido_root = bio_ontology.label('IDO', '0') bio_ontology.add_node(ido_root, name='infectious disease concept') edges_to_add = [] for node in bio_ontology.nodes(): if bio_ontology.get_ns(node) == 'IDO' and not \ bio_ontology.get_parents(*bio_ontology.get_ns_id(node)): edges_to_add.append((node, ido_root, {'type': 'isa'})) bio_ontology.add_edges_from(edges_to_add)
def add_efo_parents(bio_ontology): edges_to_add = [] efo_root = 'EFO:0000001' for node in bio_ontology.nodes(): if bio_ontology.get_ns(node) == 'EFO' and \ not bio_ontology.get_parents(*bio_ontology.get_ns_id(node)): edges_to_add.append((node, efo_root, {'type': 'isa'})) print('Adding %d EFO isa edges.' % len(edges_to_add)) bio_ontology.add_edges_from(edges_to_add)
def add_drugbank_parents(bio_ontology): edges_to_add = [] drugbank_root = 'DRUGBANK:DB00000' bio_ontology.add_node(drugbank_root, name='Drugs') for node in bio_ontology.nodes(): if bio_ontology.get_ns(node) == 'DRUGBANK' and \ not bio_ontology[node]: edges_to_add.append((node, drugbank_root, {'type': 'isa'})) print('Adding %d DRUGBANK isa edges.' % len(edges_to_add)) bio_ontology.add_edges_from(edges_to_add)
def add_protein_parents(bio_ontology): """Add parent categories for proteins in the ontology.""" # Add root nodes for human and non-human proteins human_root = 'INDRA:HUMAN_PROTEIN' non_human_root = 'INDRA:NON_HUMAN_PROTEIN' bio_ontology.add_node(human_root, name='Human protein') bio_ontology.add_node(non_human_root, name='Non-human protein') # We add each category as a node and link them to the human protein # root edges_to_add = [] for category_name, category_label in category_map.items(): bio_ontology.add_node(category_label, name=category_name) edges_to_add.append((category_label, human_root, {'type': 'isa'})) # Now we go over the whole ontology, and add extra edges for node in bio_ontology.nodes(): # If this is a protein family and doesn't have any further FamPlex # parents then we find its specific protein children, look at all # their categories, and add links from this node to the nodes of # these categories. if is_protein_family(bio_ontology, node): # Skip if this has further FPLX parents if has_fplx_parents(bio_ontology, node): continue else: # Get child categories categoriesx = get_categories(node) # If there are no categories, link directly to human protein # root if not categoriesx: edges_to_add.append((node, human_root, {'type': 'isa'})) else: # If there are categories, we link this family to each # of those for category in categoriesx: edges_to_add.append((node, category_map[category], { 'type': 'isa' })) # If this is a specific human protein and doesn't have any FamPlex # parents then we link it to either a category node or the root node elif is_human_protein(bio_ontology, node): if has_fplx_parents(bio_ontology, node): continue category_node = get_category(node) # If there is a caqtegory, we link to that, otherwise to # the human protein root if not category_node: edges_to_add.append((node, human_root, {'type': 'isa'})) else: edges_to_add.append((node, category_node, {'type': 'isa'})) elif is_non_human_protein(bio_ontology, node): edges_to_add.append((node, non_human_root, {'type': 'isa'})) bio_ontology.add_edges_from(edges_to_add)
def add_protein_parents(bio_ontology): human_root = 'INDRA:HUMAN_PROTEIN' non_human_root = 'INDRA:NON_HUMAN_PROTEIN' bio_ontology.add_node(human_root, name='Human protein') bio_ontology.add_node(non_human_root, name='Non-human protein') edges_to_add = [] for node in bio_ontology.nodes(): if is_human_protein(bio_ontology, node): edges_to_add.append((node, human_root, {'type': 'isa'})) elif is_non_human_protein(bio_ontology, node): edges_to_add.append((node, non_human_root, {'type': 'isa'})) bio_ontology.add_edges_from(edges_to_add)