Example #1
0
def _make_famplex_lookup():
    """Create a famplex lookup dictionary.

    Keys are sorted tuples of HGNC gene names and values are
    the corresponding FamPlex ID.
    """

    fplx_lookup = {}
    bio_ontology.initialize()
    for node in bio_ontology.nodes:
        ns, id = bio_ontology.get_ns_id(node)
        if ns == 'FPLX':
            children = bio_ontology.get_children(ns, id)
            hgnc_children = [
                bio_ontology.get_name(*c) for c in children if c[0] == 'HGNC'
            ]
            fplx_lookup[tuple(sorted(hgnc_children))] = id
    return fplx_lookup
Example #2
0
def normalize_sif_names(sif_df: DataFrame):
    """Try to normalize names in the sif dump dataframe

    This function tries to normalize the names of the entities in the sif
    dump. The 'bio_ontology' is the arbiter of what constitutes a normalized
    name. If no name exists, no further attempt to change the name is made.

    Parameters
    ----------
    sif_df :
        The sif dataframe
    """
    from indra.ontology.bio import bio_ontology
    bio_ontology.initialize()
    logger.info('Getting ns, id, name tuples')

    # Get the set of grounded entities
    ns_id_name_tups = set(
        zip(sif_df.agA_ns, sif_df.agA_id, sif_df.agA_name)).union(
        set(zip(sif_df.agB_ns, sif_df.agB_id, sif_df.agB_name))
    )

    # Get the ontology name, if it exists, and check if the name in the
    # dataframe needs update
    logger.info('Checking which names need updating')
    inserted_set = set()
    for ns_, id_, cur_name in tqdm(ns_id_name_tups):
        oname = bio_ontology.get_name(ns_, id_)
        # If there is a name in the ontology and it is different than the
        # original, insert it
        if oname and oname != cur_name and (ns_, id_, oname) not in inserted_set:
            inserted_set.add((ns_, id_, oname))

    if len(inserted_set) > 0:
        logger.info(f'Found {len(inserted_set)} names in dataframe that need '
                    f'renaming')

        # Make dataframe of rename dict
        logger.info('Making rename dataframe')
        df_dict = defaultdict(list)
        for ns_, id_, name in inserted_set:
            df_dict['ns'].append(ns_)
            df_dict['id'].append(id_)
            df_dict['name'].append(name)

        rename_df = pd.DataFrame(df_dict)

        # Do merge on with relevant columns from sif for both A and B
        logger.info('Getting temporary dataframes for renaming')

        # Get dataframe with ns, id, new name column
        rename_a = sif_df[['agA_ns', 'agA_id']].merge(
            right=rename_df,
            left_on=['agA_ns', 'agA_id'],
            right_on=['ns', 'id'], how='left'
        ).drop('ns', axis=1).drop('id', axis=1)

        # Check which rows have name entries
        truthy_a = pd.notna(rename_a.name)

        # Rename in sif_df from new names
        sif_df.loc[truthy_a, 'agA_name'] = rename_a.name[truthy_a]

        # Repeat for agB_name
        rename_b = sif_df[['agB_ns', 'agB_id']].merge(
            right=rename_df,
            left_on=['agB_ns', 'agB_id'],
            right_on=['ns', 'id'], how='left'
        ).drop('ns', axis=1).drop('id', axis=1)
        truthy_b = pd.notna(rename_b.name)
        sif_df.loc[truthy_b, 'agB_name'] = rename_b.name[truthy_b]

        # Check that there are no missing names
        logger.info('Performing sanity checks')
        assert sum(pd.isna(sif_df.agA_name)) == 0
        assert sum(pd.isna(sif_df.agB_name)) == 0

        # Get the set of ns, id, name tuples and check diff
        ns_id_name_tups_after = set(
            zip(sif_df.agA_ns, sif_df.agA_id, sif_df.agA_name)).union(
            set(zip(sif_df.agB_ns, sif_df.agB_id, sif_df.agB_name))
        )
        # Check that rename took place
        assert ns_id_name_tups_after != ns_id_name_tups
        # Check that all new names are used
        assert set(rename_df.name).issubset({n for _, _, n in ns_id_name_tups_after})
        logger.info('Sif dataframe renamed successfully')
    else:
        logger.info('No names need renaming')
Example #3
0
"""This service implements all base functions of the ontology graph as a REST
service. The three key functions that most ontology methods rely on are
child_rel, parent_rel, and get_node_property. There are a few other bookkeeping
functions that also need to be implemented here since they access ontology
attributes directly."""
import argparse
from flask import Flask, request, jsonify
from indra.ontology.bio import bio_ontology

app = Flask(__name__)


bio_ontology.initialize()
ontologies = {'bio': bio_ontology}


@app.route('/child_rel', methods=['GET'])
def child_rel():
    ont = request.json.get('ontology')
    ontology = ontologies.get(ont)
    kwargs = ('ns', 'id', 'rel_types')
    return jsonify(list(ontology.child_rel(
        **{k: v for k, v in request.json.items() if k in kwargs})))


@app.route('/parent_rel', methods=['GET'])
def parent_rel():
    ont = request.json.get('ontology')
    ontology = ontologies.get(ont)
    kwargs = ('ns', 'id', 'rel_types')
    return jsonify(list(ontology.parent_rel(