def test_awe_match_pairs(): """ Text axiom weight estimation """ ont = Ontology() assert ont.nodes() == [] lexmap = LexicalMapEngine( config={ 'match_weights': [{ 'prefix1': 'X', 'prefix2': 'Y', 'weights': [1.0, -1.0, 2.0, 0.0] }] }) ont.add_node('X:1', 'foo 1') ont.add_node('Y:1', 'foo 1') lexmap.index_ontology(ont) xg = lexmap.get_xref_graph() df = lexmap.as_dataframe(xg) print(df.to_csv(sep="\t")) P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg) P_YX = lexmap.weighted_axioms('Y:1', 'X:1', xg) logging.info('P_XY={} P_YX={}'.format(P_XY, P_YX)) assert P_XY[0] > P_XY[1] assert P_XY[0] == P_YX[1]
def test_merge(): factory = OntologyFactory() print("Creating ont") ont = factory.create('tests/resources/lexmap_test.json') ont2 = Ontology() ont2.merge([ont]) assert ont2.xref_graph is not None
def test_awe_1_to_1(): """ Text axiom weight estimation """ ont = Ontology() assert ont.nodes() == [] lexmap = LexicalMapEngine( config={ 'cardinality_weights': [{ 'prefix1': 'X', 'prefix2': 'Y', 'cardinality': '11', 'weights': [-1.0, -1.0, 2.0, 0.0] }] }) ont.add_node('X:1', 'foo 1') ont.add_node('Y:1', 'foo 1') ont.add_node('Z:1a', 'foo 1') ont.add_node('Z:1b', 'foo 1') lexmap.index_ontology(ont) xg = lexmap.get_xref_graph() df = lexmap.as_dataframe(xg) print(df.to_csv(sep="\t")) P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg) P_XZ = lexmap.weighted_axioms('X:1', 'Z:1a', xg) logging.info('P_XY={} P_XZ={}'.format(P_XY, P_XZ)) assert P_XY[2] > P_XZ[2]
def create_study_terms(): print('Creating study terms') handle = os.path.join(FIXTURE_DIR, 'edda.json') with open(handle, 'r', encoding='utf-8') as f: edda_json = f.read() g = obograph_util.convert_json_object(json.loads(edda_json)) ont = Ontology(handle=handle, payload=g) study_terms = [] for class_node in get_classes(ont): study_terms.append( StudyTerm(term_id=class_node[0], label=class_node[1])) StudyTerm.objects.bulk_create(study_terms)
def create_gene_terms(): print('Creating gene terms') handle = os.path.join(FIXTURE_DIR, 'hgnc.json') with open(handle, 'r', encoding='utf-8') as f: hgnc_json = f.read() g = obograph_util.convert_json_object(json.loads(hgnc_json)) ont = Ontology(handle=handle, payload=g) gene_terms = [] for n_id in ont.nodes(): n_dict = ont.node(n_id) if 'type' in n_dict: if ont.node_type(n_id) == 'CLASS': for t in n_dict['meta']['basicPropertyValues']: if t['pred'] == 'http://ncicb.nci.nih.gov/xml/owl/EVS/Hugo.owl#Approved_Symbol': symbol = t['val'] if not symbol.endswith('~withdrawn'): # print('{} {}'.format(n_id, symbol)) gene_terms.append( GeneTerm(term_id=n_id, label=symbol)) break GeneTerm.objects.bulk_create(gene_terms)
def create_phenotype_terms(): print('Creating phenotype terms') handle = os.path.join(FIXTURE_DIR, 'hpo.json') with open(handle, 'r', encoding='utf-8') as f: hpo_json = f.read() g = obograph_util.convert_json_object(json.loads(hpo_json)) ont = Ontology(handle=handle, payload=g) phenotype_terms = [] for class_node in get_classes(ont): phenotype_terms.append( PhenotypeTerm(term_id=class_node[0], label=class_node[1])) PhenotypeTerm.objects.bulk_create(phenotype_terms)
def create_disease_terms(): print('Removing existing disease terms') with connection.cursor() as cursor: cursor.execute("truncate core_diseaseterm cascade") print('Creating hdo disease terms') handle = os.path.join(FIXTURE_DIR, 'hdo.json') with open(handle, 'r', encoding='utf-8') as f: hdo_json = f.read() g = obograph_util.convert_json_object(json.loads(hdo_json)) ont = Ontology(handle=handle, payload=g) disease_terms = [] for class_node in get_classes(ont): disease_terms.append(DiseaseTerm(term_id=class_node[0], label=class_node[1])) DiseaseTerm.objects.bulk_create(disease_terms)
def find_set_covering( subsets: List[CommonAncestor], ontology: Ontology = None, value: List[float] = None, max_num_subsets: int = None ) -> Union[None, List[Tuple[str, Set[str]]]]: """greedy algorithm to solve set covering problem on subsets of trimming candidates Args: ontology: optional ontology to avoid parent-child relationships in the final result subsets (List[Tuple[str, str, Set[str]]]): list of subsets, each of which must contain a tuple with the first element being the ID of the subset, the second being the name, and the third the actual set of elements value (List[float]): list of costs of the subsets max_num_subsets (int): maximum number of subsets in the final list Returns: Union[None, List[str]]: the list of IDs of the subsets that maximize coverage with respect to the elements in the element universe """ logger.debug("starting set covering optimization") elem_to_process = {subset.node_id for subset in subsets} if value and len(value) != len(elem_to_process): return None universe = set( [e for subset in subsets for e in subset.covered_starting_nodes]) included_elmts = set() included_sets = [] while len(elem_to_process) > 0 and included_elmts != universe and ( not max_num_subsets or len(included_sets) < max_num_subsets): if value: effect_sets = sorted([ (v * len(s.covered_starting_nodes - included_elmts), s.covered_starting_nodes, s.node_label, s.node_id) for s, v in zip(subsets, value) if s.node_id in elem_to_process ], key=lambda x: (-x[0], x[2])) else: effect_sets = sorted( [(len(s.covered_starting_nodes - included_elmts), s.covered_starting_nodes, s.node_label, s.node_id) for s in subsets if s.node_id in elem_to_process], key=lambda x: (-x[0], x[2])) elem_to_process.remove(effect_sets[0][3]) if ontology: for elem in included_sets: if effect_sets[0][3] in ontology.ancestors(elem[0]): included_sets.remove(elem) included_elmts |= effect_sets[0][1] included_sets.append((effect_sets[0][3], effect_sets[0][1])) logger.debug("finished set covering optimization") return included_sets
def test_awe_1_to_many_hier(): """ Text axiom weight estimation """ ont = Ontology() assert ont.nodes() == [] lexmap = LexicalMapEngine() ont.add_node('X:1', 'foo 1') ont.add_node('Z:1a', 'foo 1') ont.add_node('Z:1b', 'foo 1') ont.add_parent('Z:1b', 'Z:1a') lexmap.index_ontology(ont) xg = lexmap.get_xref_graph() df = lexmap.as_dataframe(xg) print(df.to_csv(sep="\t")) P_a = lexmap.weighted_axioms('X:1', 'Z:1a', xg) P_b = lexmap.weighted_axioms('X:1', 'Z:1b', xg) logging.info('P_a={} P_b={}'.format(P_a, P_b)) assert P_a[0] > P_a[1] assert P_b[0] < P_b[1] assert P_a[0] > P_b[0]
def split_assocs(self, root_target : TargetClass, ontology : Ontology = None): logging.info('Splitting assocs on: {} // {}'.format(root_target, ontology)) aset = self.assocs if ontology is None: ontology = aset.ontology fmap = {} tmap = {} for subj in aset.subjects: targets = set() features = set() for c in aset.annotations(subj): if root_target in ontology.ancestors(c, reflexive=True): targets.add(c) else: features.add(c) fmap[subj] = features tmap[subj] = targets self.assocs = AssociationSet(ontology=ontology, association_map=fmap) self.target_assocs = AssociationSet(ontology=ontology, association_map=tmap) logging.info('Split; f={} t={}'.format(self.assocs, self.target_assocs))
def test_awe_xref_weights(): """ Text axiom weight estimation, when provided with defaults """ ont = Ontology() assert ont.nodes() == [] lexmap = LexicalMapEngine( config={ 'xref_weights': [ { 'left': 'X:1', 'right': 'Y:1', 'weights': [100.0, 0.0, 0.0, 0.0] }, { 'left': 'Z:1', 'right': 'Y:1', 'weights': [0.0, 100.0, 0.0, 0.0] }, ] }) ont.add_node('X:1', 'foo') ont.add_node('Y:1', 'foo') ont.add_node('Z:1', 'foo') lexmap.index_ontology(ont) xg = lexmap.get_xref_graph() df = lexmap.as_dataframe(xg) print(df.to_csv(sep="\t")) P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg) P_YZ = lexmap.weighted_axioms('Y:1', 'Z:1', xg) logging.info('P_XY={} P_XZ={}'.format(P_XY, P_YZ)) assert P_XY[0] > P_XY[1] assert P_XY[0] > P_XY[2] assert P_XY[0] > P_XY[3] assert P_YZ[0] > P_YZ[1] assert P_YZ[0] > P_YZ[2] assert P_YZ[0] > P_YZ[3]
def test_awe_scope_map(): """ Text axiom weight estimation, syn scopes """ ont = Ontology() assert ont.nodes() == [] lexmap = LexicalMapEngine() ont.add_node('X:1', 'x1') ont.add_node('Y:1', 'y1') ont.add_node('Z:1', 'z1') ont.add_synonym(Synonym('X:1', val='related', pred='hasRelatedSynonym')) ont.add_synonym(Synonym('Y:1', val='related', pred='hasRelatedSynonym')) ont.add_synonym(Synonym('Y:1', val='exact', pred='hasExactSynonym')) ont.add_synonym(Synonym('Z:1', val='exact', pred='hasExactSynonym')) lexmap.index_ontology(ont) xg = lexmap.get_xref_graph() df = lexmap.as_dataframe(xg) print(df.to_csv(sep="\t")) P_XY = lexmap.weighted_axioms('X:1', 'Y:1', xg) P_YZ = lexmap.weighted_axioms('Y:1', 'Z:1', xg) logging.info('P_XY={} P_XZ={}'.format(P_XY, P_YZ)) assert P_XY[2] > P_XY[0] assert P_XY[2] > P_XY[1] assert P_XY[2] > P_XY[3] assert P_XY[2] < P_YZ[2]
def test_awe_1_to_many_default(): """ As previous test, but with defaults """ ont = Ontology() lexmap = LexicalMapEngine( config={ 'cardinality_weights': [{ 'cardinality': 'm1', 'weights': [1.0, -1.0, -2.0, 0.0] }] }) ont.add_node('X:1', 'foo 1') ont.add_node('Y:1a', 'foo 1a') ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym')) ont.add_node('Y:1b', 'foo 1b') ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym')) lexmap.index_ontology(ont) xg = lexmap.get_xref_graph() df = lexmap.as_dataframe(xg) print(df.to_csv(sep="\t")) P = lexmap.weighted_axioms('X:1', 'Y:1a', xg) logging.info('P={}'.format(P)) assert P[0] < P[1] assert P[1] > P[2]
def _get_single_sentence(node_ids: List[str], ontology: Ontology, aspect: str, evidence_group: str, qualifier: str, prepostfix_sentences_map: Dict[Tuple[str, str, str], Tuple[str, str]], terms_merged: bool = False, add_others: bool = False, truncate_others_generic_word: str = "several", truncate_others_aspect_words: Dict[str, str] = None, ancestors_with_multiple_children: Set[str] = None, rename_cell: bool = False, trimmed: bool = False) -> Union[Sentence, None]: """build a sentence object Args: node_ids (List[str]): list of ids for the terms to be combined in the sentence ontology (Ontology): the ontology containing the nodes aspect (str): aspect evidence_group (str): evidence group qualifier (str): qualifier prepostfix_sentences_map (Dict[Tuple[str, str, str], Tuple[str, str]]): map for prefix and postfix phrases terms_merged (bool): whether the terms set has been merged to reduce its size add_others (bool): whether to say that there are other terms which have been omitted from the sentence truncate_others_generic_word (str): a generic word to indicate that the set of terms reported in the sentence is only a subset of the original terms, e.g., 'several' truncate_others_aspect_words (Dict[str, str]): one word for each aspect describing the kind of terms that are included in the aspect ancestors_with_multiple_children (Set[str]): set containing labels of terms that cover more than one children term in the original set and which will appear with the label '(multiple)' rename_cell (bool): whether to rename the term 'cell' Returns: Union[Sentence,None]: the combined go sentence """ if len(node_ids) > 0: prefix = prepostfix_sentences_map[(aspect, evidence_group, qualifier)][0] additional_prefix = "" others_word = "entities" if aspect in truncate_others_aspect_words: others_word = truncate_others_aspect_words[aspect] if add_others: additional_prefix += " " + truncate_others_generic_word + " " + others_word + ", including" if aspect == "C": additional_prefix += " the" postfix = prepostfix_sentences_map[(aspect, evidence_group, qualifier)][1] term_labels = [ ontology.label(node_id, id_if_null=True) for node_id in node_ids ] return Sentence( prefix=prefix, terms_ids=node_ids, postfix=postfix, text=compose_sentence(prefix=prefix, term_names=term_labels, postfix=postfix, additional_prefix=additional_prefix, ancestors_with_multiple_children= ancestors_with_multiple_children, rename_cell=rename_cell), aspect=aspect, evidence_group=evidence_group, terms_merged=terms_merged, additional_prefix=additional_prefix, qualifier=qualifier, ancestors_covering_multiple_terms=ancestors_with_multiple_children, trimmed=trimmed) else: return None
def get_all_paths_to_root(node_id: str, ontology: Ontology, min_distance_from_root: int = 0, relations: List[str] = None, nodeids_blacklist: List[str] = None, previous_path: Union[None, List[str]] = None, root_node=None) -> Set[Tuple[str]]: """get all possible paths connecting a go term to its root terms Args: node_id (str): a valid GO id for the starting term ontology (Ontology): the go ontology min_distance_from_root (int): return only terms at a specified minimum distance from root terms relations (List[str]): the list of relations to be used nodeids_blacklist (List[str]): a list of node ids to exclude from the paths previous_path (Union[None, List[str]]): the path to get to the current node Returns: Set[Tuple[str]]: the set of paths connecting the specified term to its root terms, each of which contains a sequence of terms ids """ if previous_path is None: previous_path = [] new_path = previous_path[:] if not nodeids_blacklist or node_id not in nodeids_blacklist: new_path.append(node_id) parents = [ parent for parent in ontology.parents(node=node_id, relations=relations) if ontology.node(parent)["depth"] >= min_distance_from_root ] parents_same_root = [] if root_node: for parent in parents: parent_node = ontology.node(parent) parent_root = None if "meta" in parent_node and "basicPropertyValues" in parent_node[ "meta"]: for basic_prop_val in parent_node["meta"][ "basicPropertyValues"]: if basic_prop_val["pred"] == "OIO:hasOBONamespace": parent_root = basic_prop_val["val"] if parent_root and parent_root == root_node: parents_same_root.append(parent) parents = parents_same_root if len(parents) > 0: # go up the tree, following a depth first visit paths_to_return = set() for parent in parents: for path in TrimmingAlgorithmNaive.get_all_paths_to_root( node_id=parent, ontology=ontology, previous_path=new_path, min_distance_from_root=min_distance_from_root, relations=relations, nodeids_blacklist=nodeids_blacklist, root_node=root_node): paths_to_return.add(path) return paths_to_return if len(new_path) == 0: return {(node_id, )} else: return {tuple(new_path)}
def get_ontology(self, data_type: DataType, provider=None): """Get Ontology""" ontology = Ontology() terms_pairs = [] if data_type == DataType.GO: terms_pairs = Neo4jHelper.run_single_parameter_query( self.get_ontology_pairs_query.format("GO", "GO"), None) elif data_type == DataType.DO: terms_pairs = Neo4jHelper.run_single_parameter_query( self.get_ontology_pairs_query.format("DO", "DO"), None) elif data_type == DataType.EXPR: if provider in EXPRESSION_PRVD_SUBTYPE_MAP: terms_pairs = Neo4jHelper.run_single_parameter_query( self.get_ontology_pairs_query.format(EXPRESSION_PRVD_SUBTYPE_MAP[provider], EXPRESSION_PRVD_SUBTYPE_MAP[provider]), None) for terms_pair in terms_pairs: self.add_neo_term_to_ontobio_ontology_if_not_exists( terms_pair["term1.primaryKey"], terms_pair["term1.name"], terms_pair["term1.type"], terms_pair["term1.isObsolete"], ontology) self.add_neo_term_to_ontobio_ontology_if_not_exists( terms_pair["term2.primaryKey"], terms_pair["term2.name"], terms_pair["term2.type"], terms_pair["term2.isObsolete"], ontology) ontology.add_parent(terms_pair["term1.primaryKey"], terms_pair["term2.primaryKey"], relation="subClassOf" if terms_pair["rel_type"] == "IS_A" else "BFO:0000050") if data_type == DataType.EXPR and provider == "MGI": self.add_neo_term_to_ontobio_ontology_if_not_exists("EMAPA_ARTIFICIAL_NODE:99999", "embryo", "anatomical_structure", False, ontology) ontology.add_parent("EMAPA_ARTIFICIAL_NODE:99999", "EMAPA:0", relation="subClassOf") self.add_neo_term_to_ontobio_ontology_if_not_exists("EMAPA_ARTIFICIAL_NODE:99998", "head", "anatomical_structure", False, ontology) ontology.add_parent("EMAPA_ARTIFICIAL_NODE:99998", "EMAPA:0", relation="subClassOf") GeneDescriptionsETL.add_neo_term_to_ontobio_ontology_if_not_exists( "EMAPA_ARTIFICIAL_NODE:99997", "gland", "anatomical_structure", False, ontology) ontology.add_parent("EMAPA_ARTIFICIAL_NODE:99997", "EMAPA:0", relation="subClassOf") elif data_type == DataType.EXPR and provider == "FB": GeneDescriptionsETL.add_neo_term_to_ontobio_ontology_if_not_exists( "FBbt_ARTIFICIAL_NODE:99999", "organism", "", False, ontology) ontology.add_parent("FBbt_ARTIFICIAL_NODE:99999", "FBbt:10000000", relation="subClassOf") return ontology
def test_mutable(): """ Test mutability of ontology class """ ont = Ontology() ont.add_node('TEST:1', 'foo bar') ont.add_node('TEST:2', 'bar foo') ont.add_node('TEST:3', 'foo bar') ont.add_node('TEST:4', 'wiz') syn = Synonym('TEST:4', val='bar foo', pred='hasExactSynonym') ont.add_synonym(syn) w = GraphRenderer.create('obo') w.write(ont) for n in ont.nodes(): meta = ont._meta(n) print('{} -> {}'.format(n, meta)) assert ont.label('TEST:1') == 'foo bar' assert ont.synonyms('TEST:1') == [] assert ont.synonyms('TEST:4')[0].val == 'bar foo'
def test_awe_1_to_many_flat(): """ Text axiom weight estimation, for a 1-to-many situation, where the many are not inter-related """ ont = Ontology() lexmap = LexicalMapEngine( config={ 'cardinality_weights': [{ 'prefix1': 'X', 'prefix2': 'Y', 'cardinality': '1m', 'weights': [-1.0, 1.0, -2.0, 0.0] }] }) ont.add_node('X:1', 'foo 1') ont.add_node('Y:1a', 'foo 1a') ont.add_synonym(Synonym('Y:1a', val='foo 1', pred='hasRelatedSynonym')) ont.add_node('Y:1b', 'foo 1b') ont.add_synonym(Synonym('Y:1b', val='foo 1', pred='hasExactSynonym')) lexmap.index_ontology(ont) xg = lexmap.get_xref_graph() df = lexmap.as_dataframe(xg) print(df.to_csv(sep="\t")) P = lexmap.weighted_axioms('X:1', 'Y:1a', xg) logging.info('P={}'.format(P)) assert P[0] < P[1] assert P[1] > P[2]