def _make_famplex_lookup(): """Create a famplex lookup dictionary. Keys are sorted tuples of HGNC gene names and values are the corresponding FamPlex ID. """ fplx_lookup = {} bio_ontology.initialize() for node in bio_ontology.nodes: ns, id = bio_ontology.get_ns_id(node) if ns == 'FPLX': children = bio_ontology.get_children(ns, id) hgnc_children = [ bio_ontology.get_name(*c) for c in children if c[0] == 'HGNC' ] fplx_lookup[tuple(sorted(hgnc_children))] = id return fplx_lookup
def normalize_sif_names(sif_df: DataFrame): """Try to normalize names in the sif dump dataframe This function tries to normalize the names of the entities in the sif dump. The 'bio_ontology' is the arbiter of what constitutes a normalized name. If no name exists, no further attempt to change the name is made. Parameters ---------- sif_df : The sif dataframe """ from indra.ontology.bio import bio_ontology bio_ontology.initialize() logger.info('Getting ns, id, name tuples') # Get the set of grounded entities ns_id_name_tups = set( zip(sif_df.agA_ns, sif_df.agA_id, sif_df.agA_name)).union( set(zip(sif_df.agB_ns, sif_df.agB_id, sif_df.agB_name)) ) # Get the ontology name, if it exists, and check if the name in the # dataframe needs update logger.info('Checking which names need updating') inserted_set = set() for ns_, id_, cur_name in tqdm(ns_id_name_tups): oname = bio_ontology.get_name(ns_, id_) # If there is a name in the ontology and it is different than the # original, insert it if oname and oname != cur_name and (ns_, id_, oname) not in inserted_set: inserted_set.add((ns_, id_, oname)) if len(inserted_set) > 0: logger.info(f'Found {len(inserted_set)} names in dataframe that need ' f'renaming') # Make dataframe of rename dict logger.info('Making rename dataframe') df_dict = defaultdict(list) for ns_, id_, name in inserted_set: df_dict['ns'].append(ns_) df_dict['id'].append(id_) df_dict['name'].append(name) rename_df = pd.DataFrame(df_dict) # Do merge on with relevant columns from sif for both A and B logger.info('Getting temporary dataframes for renaming') # Get dataframe with ns, id, new name column rename_a = sif_df[['agA_ns', 'agA_id']].merge( right=rename_df, left_on=['agA_ns', 'agA_id'], right_on=['ns', 'id'], how='left' ).drop('ns', axis=1).drop('id', axis=1) # Check which rows have name entries truthy_a = pd.notna(rename_a.name) # Rename in sif_df from new names sif_df.loc[truthy_a, 'agA_name'] = rename_a.name[truthy_a] # Repeat for agB_name rename_b = sif_df[['agB_ns', 'agB_id']].merge( right=rename_df, left_on=['agB_ns', 'agB_id'], right_on=['ns', 'id'], how='left' ).drop('ns', axis=1).drop('id', axis=1) truthy_b = pd.notna(rename_b.name) sif_df.loc[truthy_b, 'agB_name'] = rename_b.name[truthy_b] # Check that there are no missing names logger.info('Performing sanity checks') assert sum(pd.isna(sif_df.agA_name)) == 0 assert sum(pd.isna(sif_df.agB_name)) == 0 # Get the set of ns, id, name tuples and check diff ns_id_name_tups_after = set( zip(sif_df.agA_ns, sif_df.agA_id, sif_df.agA_name)).union( set(zip(sif_df.agB_ns, sif_df.agB_id, sif_df.agB_name)) ) # Check that rename took place assert ns_id_name_tups_after != ns_id_name_tups # Check that all new names are used assert set(rename_df.name).issubset({n for _, _, n in ns_id_name_tups_after}) logger.info('Sif dataframe renamed successfully') else: logger.info('No names need renaming')
"""This service implements all base functions of the ontology graph as a REST service. The three key functions that most ontology methods rely on are child_rel, parent_rel, and get_node_property. There are a few other bookkeeping functions that also need to be implemented here since they access ontology attributes directly.""" import argparse from flask import Flask, request, jsonify from indra.ontology.bio import bio_ontology app = Flask(__name__) bio_ontology.initialize() ontologies = {'bio': bio_ontology} @app.route('/child_rel', methods=['GET']) def child_rel(): ont = request.json.get('ontology') ontology = ontologies.get(ont) kwargs = ('ns', 'id', 'rel_types') return jsonify(list(ontology.child_rel( **{k: v for k, v in request.json.items() if k in kwargs}))) @app.route('/parent_rel', methods=['GET']) def parent_rel(): ont = request.json.get('ontology') ontology = ontologies.get(ont) kwargs = ('ns', 'id', 'rel_types') return jsonify(list(ontology.parent_rel(