Example #1
0
def test_awe_match_pairs():
    """
    Text axiom weight estimation
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine(
        config={
            'match_weights': [{
                'prefix1': 'X',
                'prefix2': 'Y',
                'weights': [1.0, -1.0, 2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1', 'foo 1')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))

    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_YX = lexmap.weighted_axioms('Y:1', 'X:1', xg)
    logging.info('P_XY={} P_YX={}'.format(P_XY, P_YX))
    assert P_XY[0] > P_XY[1]
    assert P_XY[0] == P_YX[1]
Example #2
0
def test_merge():
    factory = OntologyFactory()
    print("Creating ont")
    ont = factory.create('tests/resources/lexmap_test.json')
    ont2 = Ontology()
    ont2.merge([ont])
    assert ont2.xref_graph is not None
Example #3
0
def test_awe_1_to_1():
    """
    Text axiom weight estimation
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'prefix1': 'X',
                'prefix2': 'Y',
                'cardinality': '11',
                'weights': [-1.0, -1.0, 2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1', 'foo 1')
    ont.add_node('Z:1a', 'foo 1')
    ont.add_node('Z:1b', 'foo 1')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_XZ = lexmap.weighted_axioms('X:1', 'Z:1a', xg)
    logging.info('P_XY={} P_XZ={}'.format(P_XY, P_XZ))
    assert P_XY[2] > P_XZ[2]
Example #4
0
 def create_study_terms():
     print('Creating study terms')
     handle = os.path.join(FIXTURE_DIR, 'edda.json')
     with open(handle, 'r', encoding='utf-8') as f:
         edda_json = f.read()
         g = obograph_util.convert_json_object(json.loads(edda_json))
         ont = Ontology(handle=handle, payload=g)
         study_terms = []
         for class_node in get_classes(ont):
             study_terms.append(
                 StudyTerm(term_id=class_node[0], label=class_node[1]))
         StudyTerm.objects.bulk_create(study_terms)
Example #5
0
 def create_gene_terms():
     print('Creating gene terms')
     handle = os.path.join(FIXTURE_DIR, 'hgnc.json')
     with open(handle, 'r', encoding='utf-8') as f:
         hgnc_json = f.read()
         g = obograph_util.convert_json_object(json.loads(hgnc_json))
         ont = Ontology(handle=handle, payload=g)
         gene_terms = []
         for n_id in ont.nodes():
             n_dict = ont.node(n_id)
             if 'type' in n_dict:
                 if ont.node_type(n_id) == 'CLASS':
                     for t in n_dict['meta']['basicPropertyValues']:
                         if t['pred'] == 'http://ncicb.nci.nih.gov/xml/owl/EVS/Hugo.owl#Approved_Symbol':
                             symbol = t['val']
                             if not symbol.endswith('~withdrawn'):
                                 # print('{}   {}'.format(n_id, symbol))
                                 gene_terms.append(
                                     GeneTerm(term_id=n_id, label=symbol))
                                 break
         GeneTerm.objects.bulk_create(gene_terms)
Example #6
0
 def create_phenotype_terms():
     print('Creating phenotype terms')
     handle = os.path.join(FIXTURE_DIR, 'hpo.json')
     with open(handle, 'r', encoding='utf-8') as f:
         hpo_json = f.read()
         g = obograph_util.convert_json_object(json.loads(hpo_json))
         ont = Ontology(handle=handle, payload=g)
         phenotype_terms = []
         for class_node in get_classes(ont):
             phenotype_terms.append(
                 PhenotypeTerm(term_id=class_node[0], label=class_node[1]))
         PhenotypeTerm.objects.bulk_create(phenotype_terms)
 def create_disease_terms():
     print('Removing existing disease terms')
     with connection.cursor() as cursor:
         cursor.execute("truncate core_diseaseterm cascade")
     print('Creating hdo disease terms')
     handle = os.path.join(FIXTURE_DIR, 'hdo.json')
     with open(handle, 'r', encoding='utf-8') as f:
         hdo_json = f.read()
         g = obograph_util.convert_json_object(json.loads(hdo_json))
         ont = Ontology(handle=handle, payload=g)
         disease_terms = []
         for class_node in get_classes(ont):
             disease_terms.append(DiseaseTerm(term_id=class_node[0], label=class_node[1]))
         DiseaseTerm.objects.bulk_create(disease_terms)
Example #8
0
def find_set_covering(
        subsets: List[CommonAncestor],
        ontology: Ontology = None,
        value: List[float] = None,
        max_num_subsets: int = None
) -> Union[None, List[Tuple[str, Set[str]]]]:
    """greedy algorithm to solve set covering problem on subsets of trimming candidates

    Args:
        ontology: optional ontology to avoid parent-child relationships in the final result
        subsets (List[Tuple[str, str, Set[str]]]): list of subsets, each of which must contain a tuple with the first
        element being the ID of the subset, the second being the name, and the third the actual set of elements
        value (List[float]): list of costs of the subsets
        max_num_subsets (int): maximum number of subsets in the final list
    Returns:
        Union[None, List[str]]: the list of IDs of the subsets that maximize coverage with respect to the elements
                                in the element universe
    """
    logger.debug("starting set covering optimization")
    elem_to_process = {subset.node_id for subset in subsets}
    if value and len(value) != len(elem_to_process):
        return None
    universe = set(
        [e for subset in subsets for e in subset.covered_starting_nodes])
    included_elmts = set()
    included_sets = []
    while len(elem_to_process) > 0 and included_elmts != universe and (
            not max_num_subsets or len(included_sets) < max_num_subsets):
        if value:
            effect_sets = sorted([
                (v * len(s.covered_starting_nodes - included_elmts),
                 s.covered_starting_nodes, s.node_label, s.node_id)
                for s, v in zip(subsets, value) if s.node_id in elem_to_process
            ],
                                 key=lambda x: (-x[0], x[2]))
        else:
            effect_sets = sorted(
                [(len(s.covered_starting_nodes - included_elmts),
                  s.covered_starting_nodes, s.node_label, s.node_id)
                 for s in subsets if s.node_id in elem_to_process],
                key=lambda x: (-x[0], x[2]))
        elem_to_process.remove(effect_sets[0][3])
        if ontology:
            for elem in included_sets:
                if effect_sets[0][3] in ontology.ancestors(elem[0]):
                    included_sets.remove(elem)
        included_elmts |= effect_sets[0][1]
        included_sets.append((effect_sets[0][3], effect_sets[0][1]))
    logger.debug("finished set covering optimization")
    return included_sets
Example #9
0
def test_awe_1_to_many_hier():
    """
    Text axiom weight estimation
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine()

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Z:1a', 'foo 1')
    ont.add_node('Z:1b', 'foo 1')
    ont.add_parent('Z:1b', 'Z:1a')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_a = lexmap.weighted_axioms('X:1', 'Z:1a', xg)
    P_b = lexmap.weighted_axioms('X:1', 'Z:1b', xg)
    logging.info('P_a={} P_b={}'.format(P_a, P_b))
    assert P_a[0] > P_a[1]
    assert P_b[0] < P_b[1]
    assert P_a[0] > P_b[0]
Example #10
0
 def split_assocs(self, root_target : TargetClass, ontology : Ontology = None):
     logging.info('Splitting assocs on: {} // {}'.format(root_target, ontology))
     aset = self.assocs
     if ontology is None:
         ontology = aset.ontology
     fmap = {}
     tmap = {}
     for subj in aset.subjects:
         targets = set()
         features = set()
         for c in aset.annotations(subj):
             if root_target in ontology.ancestors(c, reflexive=True):
                 targets.add(c)
             else:
                 features.add(c)
         fmap[subj] = features
         tmap[subj] = targets
     self.assocs = AssociationSet(ontology=ontology, association_map=fmap)
     self.target_assocs = AssociationSet(ontology=ontology, association_map=tmap)
     logging.info('Split; f={} t={}'.format(self.assocs, self.target_assocs))
Example #11
0
def test_awe_xref_weights():
    """
    Text axiom weight estimation, when provided with defaults
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine(
        config={
            'xref_weights': [
                {
                    'left': 'X:1',
                    'right': 'Y:1',
                    'weights': [100.0, 0.0, 0.0, 0.0]
                },
                {
                    'left': 'Z:1',
                    'right': 'Y:1',
                    'weights': [0.0, 100.0, 0.0, 0.0]
                },
            ]
        })
    ont.add_node('X:1', 'foo')
    ont.add_node('Y:1', 'foo')
    ont.add_node('Z:1', 'foo')

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_YZ = lexmap.weighted_axioms('Y:1', 'Z:1', xg)
    logging.info('P_XY={} P_XZ={}'.format(P_XY, P_YZ))
    assert P_XY[0] > P_XY[1]
    assert P_XY[0] > P_XY[2]
    assert P_XY[0] > P_XY[3]
    assert P_YZ[0] > P_YZ[1]
    assert P_YZ[0] > P_YZ[2]
    assert P_YZ[0] > P_YZ[3]
Example #12
0
def test_awe_scope_map():
    """
    Text axiom weight estimation, syn scopes
    """
    ont = Ontology()
    assert ont.nodes() == []
    lexmap = LexicalMapEngine()
    ont.add_node('X:1', 'x1')
    ont.add_node('Y:1', 'y1')
    ont.add_node('Z:1', 'z1')
    ont.add_synonym(Synonym('X:1', val='related', pred='hasRelatedSynonym'))
    ont.add_synonym(Synonym('Y:1', val='related', pred='hasRelatedSynonym'))

    ont.add_synonym(Synonym('Y:1', val='exact', pred='hasExactSynonym'))
    ont.add_synonym(Synonym('Z:1', val='exact', pred='hasExactSynonym'))

    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg)
    P_YZ = lexmap.weighted_axioms('Y:1', 'Z:1', xg)
    logging.info('P_XY={} P_XZ={}'.format(P_XY, P_YZ))
    assert P_XY[2] > P_XY[0]
    assert P_XY[2] > P_XY[1]
    assert P_XY[2] > P_XY[3]
    assert P_XY[2] < P_YZ[2]
Example #13
0
def test_awe_1_to_many_default():
    """
    As previous test, but with defaults
    """
    ont = Ontology()
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'cardinality': 'm1',
                'weights': [1.0, -1.0, -2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1a', 'foo 1a')
    ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym'))
    ont.add_node('Y:1b', 'foo 1b')
    ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym'))
    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P = lexmap.weighted_axioms('X:1', 'Y:1a', xg)
    logging.info('P={}'.format(P))
    assert P[0] < P[1]
    assert P[1] > P[2]
def _get_single_sentence(node_ids: List[str],
                         ontology: Ontology,
                         aspect: str,
                         evidence_group: str,
                         qualifier: str,
                         prepostfix_sentences_map: Dict[Tuple[str, str, str],
                                                        Tuple[str, str]],
                         terms_merged: bool = False,
                         add_others: bool = False,
                         truncate_others_generic_word: str = "several",
                         truncate_others_aspect_words: Dict[str, str] = None,
                         ancestors_with_multiple_children: Set[str] = None,
                         rename_cell: bool = False,
                         trimmed: bool = False) -> Union[Sentence, None]:
    """build a sentence object

    Args:
        node_ids (List[str]): list of ids for the terms to be combined in the sentence
        ontology (Ontology): the ontology containing the nodes
        aspect (str): aspect
        evidence_group (str): evidence group
        qualifier (str): qualifier
        prepostfix_sentences_map (Dict[Tuple[str, str, str], Tuple[str, str]]): map for prefix and postfix phrases
        terms_merged (bool): whether the terms set has been merged to reduce its size
        add_others (bool): whether to say that there are other terms which have been omitted from the sentence
        truncate_others_generic_word (str): a generic word to indicate that the set of terms reported in the sentence is
            only a subset of the original terms, e.g., 'several'
        truncate_others_aspect_words (Dict[str, str]): one word for each aspect describing the kind of terms that are
            included in the aspect
        ancestors_with_multiple_children (Set[str]): set containing labels of terms that cover more than one children
            term in the original set and which will appear with the label '(multiple)'
        rename_cell (bool): whether to rename the term 'cell'
    Returns:
        Union[Sentence,None]: the combined go sentence
    """
    if len(node_ids) > 0:
        prefix = prepostfix_sentences_map[(aspect, evidence_group,
                                           qualifier)][0]
        additional_prefix = ""
        others_word = "entities"
        if aspect in truncate_others_aspect_words:
            others_word = truncate_others_aspect_words[aspect]
        if add_others:
            additional_prefix += " " + truncate_others_generic_word + " " + others_word + ", including"
        if aspect == "C":
            additional_prefix += " the"
        postfix = prepostfix_sentences_map[(aspect, evidence_group,
                                            qualifier)][1]
        term_labels = [
            ontology.label(node_id, id_if_null=True) for node_id in node_ids
        ]
        return Sentence(
            prefix=prefix,
            terms_ids=node_ids,
            postfix=postfix,
            text=compose_sentence(prefix=prefix,
                                  term_names=term_labels,
                                  postfix=postfix,
                                  additional_prefix=additional_prefix,
                                  ancestors_with_multiple_children=
                                  ancestors_with_multiple_children,
                                  rename_cell=rename_cell),
            aspect=aspect,
            evidence_group=evidence_group,
            terms_merged=terms_merged,
            additional_prefix=additional_prefix,
            qualifier=qualifier,
            ancestors_covering_multiple_terms=ancestors_with_multiple_children,
            trimmed=trimmed)
    else:
        return None
Example #15
0
    def get_all_paths_to_root(node_id: str,
                              ontology: Ontology,
                              min_distance_from_root: int = 0,
                              relations: List[str] = None,
                              nodeids_blacklist: List[str] = None,
                              previous_path: Union[None, List[str]] = None,
                              root_node=None) -> Set[Tuple[str]]:
        """get all possible paths connecting a go term to its root terms

        Args:
            node_id (str): a valid GO id for the starting term
            ontology (Ontology): the go ontology
            min_distance_from_root (int): return only terms at a specified minimum distance from root terms
            relations (List[str]): the list of relations to be used
            nodeids_blacklist (List[str]): a list of node ids to exclude from the paths
            previous_path (Union[None, List[str]]): the path to get to the current node
        Returns:
            Set[Tuple[str]]: the set of paths connecting the specified term to its root terms, each of which contains a
            sequence of terms ids
        """
        if previous_path is None:
            previous_path = []
        new_path = previous_path[:]
        if not nodeids_blacklist or node_id not in nodeids_blacklist:
            new_path.append(node_id)
        parents = [
            parent
            for parent in ontology.parents(node=node_id, relations=relations)
            if ontology.node(parent)["depth"] >= min_distance_from_root
        ]
        parents_same_root = []
        if root_node:
            for parent in parents:
                parent_node = ontology.node(parent)
                parent_root = None
                if "meta" in parent_node and "basicPropertyValues" in parent_node[
                        "meta"]:
                    for basic_prop_val in parent_node["meta"][
                            "basicPropertyValues"]:
                        if basic_prop_val["pred"] == "OIO:hasOBONamespace":
                            parent_root = basic_prop_val["val"]
                if parent_root and parent_root == root_node:
                    parents_same_root.append(parent)
            parents = parents_same_root

        if len(parents) > 0:
            # go up the tree, following a depth first visit
            paths_to_return = set()
            for parent in parents:
                for path in TrimmingAlgorithmNaive.get_all_paths_to_root(
                        node_id=parent,
                        ontology=ontology,
                        previous_path=new_path,
                        min_distance_from_root=min_distance_from_root,
                        relations=relations,
                        nodeids_blacklist=nodeids_blacklist,
                        root_node=root_node):
                    paths_to_return.add(path)
            return paths_to_return
        if len(new_path) == 0:
            return {(node_id, )}
        else:
            return {tuple(new_path)}
    def get_ontology(self, data_type: DataType, provider=None):
        """Get Ontology"""

        ontology = Ontology()
        terms_pairs = []
        if data_type == DataType.GO:
            terms_pairs = Neo4jHelper.run_single_parameter_query(
                self.get_ontology_pairs_query.format("GO", "GO"),
                None)
        elif data_type == DataType.DO:
            terms_pairs = Neo4jHelper.run_single_parameter_query(
                self.get_ontology_pairs_query.format("DO", "DO"),
                None)
        elif data_type == DataType.EXPR:
            if provider in EXPRESSION_PRVD_SUBTYPE_MAP:
                terms_pairs = Neo4jHelper.run_single_parameter_query(
                    self.get_ontology_pairs_query.format(EXPRESSION_PRVD_SUBTYPE_MAP[provider],
                                                         EXPRESSION_PRVD_SUBTYPE_MAP[provider]),
                    None)
        for terms_pair in terms_pairs:
            self.add_neo_term_to_ontobio_ontology_if_not_exists(
                terms_pair["term1.primaryKey"], terms_pair["term1.name"], terms_pair["term1.type"],
                terms_pair["term1.isObsolete"], ontology)
            self.add_neo_term_to_ontobio_ontology_if_not_exists(
                terms_pair["term2.primaryKey"], terms_pair["term2.name"], terms_pair["term2.type"],
                terms_pair["term2.isObsolete"], ontology)
            ontology.add_parent(terms_pair["term1.primaryKey"], terms_pair["term2.primaryKey"],
                                relation="subClassOf" if terms_pair["rel_type"] == "IS_A" else "BFO:0000050")
        if data_type == DataType.EXPR and provider == "MGI":
            self.add_neo_term_to_ontobio_ontology_if_not_exists("EMAPA_ARTIFICIAL_NODE:99999",
                                                                "embryo",
                                                                "anatomical_structure",
                                                                False,
                                                                ontology)
            ontology.add_parent("EMAPA_ARTIFICIAL_NODE:99999", "EMAPA:0", relation="subClassOf")
            self.add_neo_term_to_ontobio_ontology_if_not_exists("EMAPA_ARTIFICIAL_NODE:99998",
                                                                "head",
                                                                "anatomical_structure",
                                                                False,
                                                                ontology)
            ontology.add_parent("EMAPA_ARTIFICIAL_NODE:99998", "EMAPA:0", relation="subClassOf")
            GeneDescriptionsETL.add_neo_term_to_ontobio_ontology_if_not_exists(
                "EMAPA_ARTIFICIAL_NODE:99997",
                "gland",
                "anatomical_structure",
                False,
                ontology)
            ontology.add_parent("EMAPA_ARTIFICIAL_NODE:99997", "EMAPA:0", relation="subClassOf")
        elif data_type == DataType.EXPR and provider == "FB":
            GeneDescriptionsETL.add_neo_term_to_ontobio_ontology_if_not_exists(
                "FBbt_ARTIFICIAL_NODE:99999",
                "organism",
                "",
                False,
                ontology)
            ontology.add_parent("FBbt_ARTIFICIAL_NODE:99999",
                                "FBbt:10000000",
                                relation="subClassOf")

        return ontology
Example #17
0
def test_mutable():
    """
    Test mutability of ontology class
    """
    ont = Ontology()
    ont.add_node('TEST:1', 'foo bar')
    ont.add_node('TEST:2', 'bar foo')
    ont.add_node('TEST:3', 'foo bar')
    ont.add_node('TEST:4', 'wiz')
    syn = Synonym('TEST:4', val='bar foo', pred='hasExactSynonym')
    ont.add_synonym(syn)
    w = GraphRenderer.create('obo')
    w.write(ont)
    for n in ont.nodes():
        meta = ont._meta(n)
        print('{} -> {}'.format(n, meta))

    assert ont.label('TEST:1') == 'foo bar'
    assert ont.synonyms('TEST:1') == []
    assert ont.synonyms('TEST:4')[0].val == 'bar foo'
Example #18
0
def test_awe_1_to_many_flat():
    """
    Text axiom weight estimation, for a 1-to-many situation, where the many are not inter-related
    """
    ont = Ontology()
    lexmap = LexicalMapEngine(
        config={
            'cardinality_weights': [{
                'prefix1': 'X',
                'prefix2': 'Y',
                'cardinality': '1m',
                'weights': [-1.0, 1.0, -2.0, 0.0]
            }]
        })

    ont.add_node('X:1', 'foo 1')
    ont.add_node('Y:1a', 'foo 1a')
    ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym'))
    ont.add_node('Y:1b', 'foo 1b')
    ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym'))
    lexmap.index_ontology(ont)
    xg = lexmap.get_xref_graph()
    df = lexmap.as_dataframe(xg)
    print(df.to_csv(sep="\t"))
    P = lexmap.weighted_axioms('X:1', 'Y:1a', xg)
    logging.info('P={}'.format(P))
    assert P[0] < P[1]
    assert P[1] > P[2]