def expand_curie_to_iri(curie_id: str, curie_to_iri_map: list) -> Optional[str]: if curie_id.startswith('UMLS:CN'): curie_id = curie_id.replace('UMLS:CN', 'medgen:CN') # see GitHub issue 810 iri = prefixcommons.expand_uri(curie_id, curie_to_iri_map) if iri == curie_id: iri = None return iri
def label(curie: str, graph: Graph) -> str: """ Given a list of phenotypes, get the reflexive closure for each phenotype stored in a single set. This can be used for jaccard similarity or simGIC """ return graph.label(URIRef(expand_uri(curie, strict=True)))
def get_node_curie_id_from_ontology_node_id(ontology_node_id: str, ontology: ontobio.ontol.Ontology, uri_to_curie_shortener: callable): node_curie_id = None if not ontology_node_id.startswith( 'http:') and not ontology_node_id.startswith('https:'): if not ontology_node_id.startswith('OBO:'): if not ontology_node_id.startswith('UMLS:C'): node_curie_id = ontology_node_id else: node_curie_id = CUI_PREFIX + ':' + ontology_node_id.split( 'UMLS:')[1] else: node_curie_id = uri_to_curie_shortener( prefixcommons.expand_uri(ontology_node_id)) else: node_curie_id = uri_to_curie_shortener(ontology_node_id) if node_curie_id is None: kg2_util.log_message( message="could not shorten this IRI to a CURIE", ontology_name=ontology.id, node_curie_id=ontology_node_id, output_stream=sys.stderr) node_curie_id = ontology_node_id # Ensure that all CUI CURIE IDs use the "CUI:" prefix (part of fix for issue #565) if is_cui_id(node_curie_id ) and get_prefix_from_curie_id(node_curie_id) != CUI_PREFIX: node_curie_id = CUI_PREFIX + ":" + get_local_id_from_curie_id( node_curie_id) return node_curie_id
def test_prefixes_cmaps(): cmaps = [ {'GO': 'http://purl.obolibrary.org/obo/GO_'}, {'OBO': 'http://purl.obolibrary.org/obo/'} ] assert contract_uri(bp_iri, cmaps) == [bp_id] all_curies = contract_uri(bp_iri, cmaps, shortest=False) assert len(all_curies) == 2 assert obo_bp_id in all_curies assert bp_id in all_curies assert expand_uri(bp_id, cmaps) == bp_iri assert expand_uri(obo_bp_id, cmaps) == bp_iri assert contract_uri("FAKE", cmaps, strict=False) == [] try: contract_uri("FAKE", cmaps, strict=True) except NoPrefix as e: pass else: assert False
def test_prefixes(): assert contract_uri(bp_iri) == [bp_id] assert expand_uri(bp_id) == bp_iri assert contract_uri("FAKE", strict=False) == [] try: contract_uri("FAKE", strict=True) except NoPrefix as e: pass else: assert False
def get_descendants(graph: Graph, node: str, edge: Optional[URIRef] = RDFS['subClassOf'], reflexive: Optional[bool] = True) -> Set[str]: nodes = set() node = URIRef(expand_uri(node, strict=True)) for sub in graph.transitive_subjects(edge, node): if not reflexive and node == sub: continue if isinstance(sub, Literal): continue nodes.add(contract_uri(str(sub), strict=True)[0]) return nodes
def get_ancestors(graph: Graph, node: str, edge: Optional[URIRef] = RDFS['subClassOf'], root: Optional[str] = None, reflexive: Optional[bool] = True) -> Set[str]: nodes = set() root_seen = {} node = URIRef(expand_uri(node, strict=True)) if root is not None: root = URIRef(expand_uri(root, strict=True)) root_seen = {root: 1} for obj in graph.transitive_objects(node, edge, root_seen): if isinstance(obj, Literal) or isinstance(obj, BNode): continue if not reflexive and node == obj: continue nodes.add(contract_uri(str(obj), strict=True)[0]) # Add root to graph if root is not None: nodes.add(contract_uri(str(root), strict=True)[0]) return nodes
def get_leaf_nodes(graph: Graph, node: str, edge: Optional[URIRef] = RDFS['subClassOf']) -> Set[str]: if not isinstance(node, URIRef): obj = URIRef(expand_uri(node, strict=True)) else: obj = node subjects = list(graph.subjects(edge, obj)) if len(subjects) == 0: yield contract_uri(str(obj), strict=True)[0] else: for subject in subjects: for leaf in get_leaf_nodes(graph, subject, edge): yield leaf
def make_rel(preds_dict: dict, subject_curie: str, object_curie: str, predicate: str, pmid: str, pub_date: str, sentence: str, subject_score: str, object_score: str, negated: bool): key = subject_curie + '-' + predicate + '-' + object_curie key_val = preds_dict.get(key, None) publication_curie = 'PMID:' + pmid publication_info_dict = { 'publication date': pub_date, 'sentence': sentence, 'subject score': subject_score, 'object score': object_score } if key_val is None: relation_type = predicate.lower() if relation_type != 'xref': relation_iri = kg2_util.convert_snake_case_to_camel_case( relation_type.replace(' ', '_')) relation_iri = relation_iri[0].lower() + relation_iri[1:] relation_iri = SEMMEDDB_IRI + '#' + relation_iri relation_curie = 'SEMMEDDB:' + relation_type else: relation_curie = 'OBO:xref' relation_iri = prefixcommons.expand_uri(relation_curie) edge_dict = kg2_util.make_edge(subject_curie, object_curie, relation_iri, relation_curie, relation_type, SEMMEDDB_IRI, curr_timestamp) edge_dict['publications'] = [publication_curie] edge_dict['publications info'] = { publication_curie: publication_info_dict } edge_dict['negated'] = negated preds_dict[key] = edge_dict else: key_val['publications info'][publication_curie] = publication_info_dict key_val['publications'] = key_val['publications'] + [publication_curie]
def _process_hpo_data(file_path: str) -> Dict[str, List[str]]: logger.info("loading mondo into memory") mondo = Graph() mondo.parse(gzip.open("../data/mondo.owl.gz", 'rb'), format='xml') logger.info("finished loading mondo") mondo_merged_lines: List[str] = [] disease_info: Dict[str, List[str]] = {} if file_path.startswith("http"): context_manager = closing(requests.get(file_path)) else: context_manager = open(file_path, "r") # https://stackoverflow.com/a/35371451 with context_manager as file: if file_path.startswith("http"): file = file.content.decode('utf-8').splitlines() reader = csv.reader(file, delimiter='\t', quotechar='\"') counter = 0 for row in reader: try: (db, num, name, severity, pheno_id, publist, eco, onset, freq) = row[0:9] except ValueError: logger.warning("Too few values in row {}".format(row)) continue # Align Id prefixes if db == 'MIM': db = 'OMIM' if db == 'ORPHA': db = 'Orphanet' if db == 'ORPHANET': db = 'Orphanet' disease_id = "{}:{}".format(db, num) disease_iri = URIRef(expand_uri(disease_id, strict=True)) mondo_curie = None mondo_iri = None for subj in mondo.subjects(OWL['equivalentClass'], disease_iri): curie = contract_uri(str(subj), strict=True)[0] if curie.startswith('MONDO'): mondo_curie = curie mondo_iri = subj break if mondo_curie is None: logger.warn("No mondo id for {}".format(disease_id)) continue has_omim = False for obj in mondo.objects(mondo_iri, OWL['equivalentClass']): try: curie = contract_uri(str(obj), strict=True)[0] except NoPrefix: continue if curie.startswith('OMIM'): has_omim = True # use scigraph instead of the above # mondo_node = monarch.get_clique_leader(disease_id) # mondo_curie = mondo_node['id'] if mondo_curie is not None and 'hgnc' in mondo_curie: # to keep these, likely decipher IDs # mondo_curie = disease_id continue if disease_id.startswith('Orphanet') \ and has_omim is False \ and len(list(mondo.objects(mondo_iri, RDFS['subClassOf']))) > 0: # disease is a disease group, skip logger.info( "{} is a disease group, skipping".format(disease_id)) continue mondo_merged_lines.append( (mondo_curie, pheno_id, onset, freq, severity)) counter += 1 if counter % 10000 == 0: logger.info("processed {} rows".format(counter)) logger.info("processed {} rows".format(counter)) for line in mondo_merged_lines: key = "{}-{}".format(line[0], line[1]) values = [line[2], line[3], line[4]] if key in disease_info and disease_info[key] != values: logger.warning("Metadata for {} and {} mismatch: {} vs {}".format( line[0], line[1], values, disease_info[key])) # attempt to merge by collapsing freq, onset, severity # that is empty in one disease but not another # conflicts will defer to the disease first inserted merged_disease_info = disease_info[key] for index, val in enumerate(values): if val == disease_info[key][index] \ or val == '' and disease_info[key][index] != '': continue elif val != '' and disease_info[key][index] == '': merged_disease_info[index] = val else: logger.warning("Cannot merge {} and {} for {}".format( values, disease_info[key], line[0])) else: disease_info[key] = values return disease_info
edge_dict['edge label'] = 'INVERTED:' + edge_label new_object = edge_dict['subject'] edge_dict['subject'] = edge_dict['object'] edge_dict['object'] = new_object edge_dict['simplified edge label'] = simplified_edge_label if drop_self_edges_except is not None and \ edge_dict['subject'] == edge_dict['object'] and \ simplified_edge_label not in drop_self_edges_except: continue # see issue 743 edge_dict['simplified relation curie'] = simplified_relation_curie if simplified_relation_curie in nodes_dict: simplified_relation = nodes_dict[simplified_relation_curie]['iri'] else: simplified_relation_curie_prefix = simplified_relation_curie.split( ':')[0] simplified_relation_uri_prefix = prefixcommons.expand_uri( simplified_relation_curie_prefix + ':', curies_to_uri_map) if simplified_relation_uri_prefix != simplified_relation_curie_prefix: simplified_relation = kg2_util.predicate_label_to_iri_and_curie( simplified_edge_label, simplified_relation_curie_prefix, simplified_relation_uri_prefix)[0] else: simplified_relation = relation relation_curies_not_in_nodes.add(simplified_relation_curie) edge_dict['simplified relation'] = simplified_relation edge_dict['provided by'] = [edge_dict['provided by']] edge_key = edge_dict[ 'subject'] + ' /// ' + simplified_edge_label + ' /// ' + edge_dict[ 'object'] existing_edge = new_edges.get(edge_key, None) if existing_edge is not None: existing_edge['provided by'] = list(
def get_rels_dict(nodes: dict, owl_file_information_dict_list: list, uri_to_curie_shortener: callable, map_of_node_ontology_ids_to_curie_ids: dict): rels_dict = dict() for owl_file_information_dict in owl_file_information_dict_list: ontology = owl_file_information_dict['ontology'] ontology_id = owl_file_information_dict['id'] ont_graph = ontology.get_graph() ontology_curie_id = map_of_node_ontology_ids_to_curie_ids[ontology_id] for (object_id, subject_id, predicate_dict) in ont_graph.edges(data=True): assert type(predicate_dict) == dict ontology_node = nodes.get(ontology_curie_id, None) if ontology_node is not None: ontology_update_date = ontology_node['update date'] if subject_id == OWL_BASE_CLASS or object_id == OWL_BASE_CLASS: continue if subject_id.startswith(MYSTERIOUS_BASE_NODE_ID_TO_FILTER) or \ object_id.startswith(MYSTERIOUS_BASE_NODE_ID_TO_FILTER): continue # subject_id and object_id are IDs from the original ontology objects; these may not # always be the node curie IDs (e.g., for SNOMED terms). Need to map them subject_curie_id = map_of_node_ontology_ids_to_curie_ids.get( subject_id, None) if subject_curie_id is None: kg2_util.log_message( message="ontology node ID has no curie ID in the map", ontology_name=ontology.id, node_curie_id=subject_id, output_stream=sys.stderr) continue object_curie_id = map_of_node_ontology_ids_to_curie_ids.get( object_id, None) if object_curie_id is None: kg2_util.log_message( message="ontology node ID has no curie ID in the map", ontology_name=ontology.id, node_curie_id=object_id, output_stream=sys.stderr) continue predicate_label = None edge_pred_string = predicate_dict['pred'] if subject_curie_id.startswith( 'TUI:') and object_curie_id.startswith( 'TUI:') and edge_pred_string == 'subClassOf': continue if not edge_pred_string.startswith( 'http:') and not edge_pred_string.startswith('https'): # edge_pred_string is not a URI; this is the most common case if ':' not in edge_pred_string: # edge_pred_string is not a CURIE; this is the most common subcase if edge_pred_string != 'subClassOf': predicate_curie = 'owl:' + edge_pred_string else: predicate_curie = 'rdfs:subClassOf' predicate_label = kg2_util.convert_camel_case_to_snake_case( edge_pred_string) else: # edge_pred_string is a CURIE predicate_curie = edge_pred_string predicate_node = nodes.get(predicate_curie, None) if predicate_node is not None: predicate_label = predicate_node['name'] else: # predicate has no node object defined; just pull the label out of the CURIE if edge_pred_string.startswith('OBO:'): test_curie = edge_pred_string.replace('OBO:', '').replace( '_', ':') predicate_node = nodes.get(test_curie, None) if predicate_node is None: predicate_label = edge_pred_string.split( ':')[1].split('#')[-1] else: predicate_curie = test_curie else: predicate_label = edge_pred_string predicate_iri = prefixcommons.expand_uri(predicate_curie) predicate_curie_new = uri_to_curie_shortener(predicate_iri) if predicate_curie_new is not None: predicate_curie = predicate_curie_new else: predicate_iri = edge_pred_string predicate_curie = uri_to_curie_shortener(predicate_iri) if predicate_curie is None: kg2_util.log_message(message="predicate IRI has no CURIE: " + predicate_iri, ontology_name=ontology.id, output_stream=sys.stderr) continue if subject_curie_id == object_curie_id and predicate_label == 'xref': continue if predicate_curie == 'UMLS:hasSTY': subject_node = nodes[subject_curie_id] object_node = nodes[object_curie_id] subject_description = subject_node['description'] if subject_description is None: subject_description = '' subject_node['description'] = '; '.join( list( filter(None, [ subject_description, 'UMLS Semantic Type: ' + object_node['id'] ]))) continue rel_key = make_rel_key(subject_curie_id, predicate_curie, object_curie_id, ontology_curie_id) if predicate_label is None and ':' in predicate_curie: pred_node = nodes.get(predicate_curie, None) if pred_node is not None: predicate_label = pred_node['name'] if predicate_label[0].isupper(): predicate_label = predicate_label[0].lower( ) + predicate_label[1:] assert predicate_label is not None predicate_label = predicate_label.replace(' ', '_') # Only tested on Food and Efo ontologies predicate_label = kg2_util.convert_camel_case_to_snake_case( predicate_label) if rels_dict.get(rel_key, None) is None: edge = kg2_util.make_edge(subject_curie_id, object_curie_id, predicate_iri, predicate_curie, predicate_label, ontology_id, ontology_update_date) rels_dict[rel_key] = edge for node_id, node_dict in nodes.items(): xrefs = node_dict['xrefs'] if xrefs is not None: for xref_node_id in xrefs: if xref_node_id in nodes and node_id != xref_node_id: provided_by = nodes[node_id]['provided by'] key = make_rel_key(node_id, CURIE_OBO_XREF, xref_node_id, provided_by) if rels_dict.get(key, None) is None: edge = kg2_util.make_edge(node_id, xref_node_id, IRI_OBO_XREF, CURIE_OBO_XREF, 'xref', provided_by, ontology_update_date) rels_dict[key] = edge return rels_dict
def make_nodes_dict_from_ontologies_list( ontology_info_list: list, curies_to_categories: dict, uri_to_curie_shortener: callable, category_label_to_iri_mapper: callable): ret_dict = dict() ontologies_iris_to_curies = dict() for ontology_info_dict in ontology_info_list: ontology = ontology_info_dict['ontology'] iri_of_ontology = ontology_info_dict['id'] assert iri_of_ontology is not None ontology_curie_id = uri_to_curie_shortener(iri_of_ontology) if ontology_curie_id is None or len(ontology_curie_id) == 0: ontology_curie_id = iri_of_ontology umls_sver = ontology_info_dict.get('umls-sver', None) updated_date = None if umls_sver is not None: # if you can, parse sver string into a date string updated_date = parse_umls_sver_date(umls_sver) if updated_date is None: updated_date = ontology_info_dict['file last modified timestamp'] ontology_node = kg2_util.make_node(ontology_curie_id, iri_of_ontology, ontology_info_dict['title'], 'data source', updated_date, iri_of_ontology) ontology_node['description'] = ontology_info_dict['description'] ontology_node['ontology node ids'] = [iri_of_ontology] ontology_node['xrefs'] = [] ret_dict[ontology_curie_id] = ontology_node ontologies_iris_to_curies[iri_of_ontology] = ontology_curie_id for ontology_node_id in ontology.nodes(): onto_node_dict = ontology.node(ontology_node_id) assert onto_node_dict is not None if ontology_node_id.startswith(MYSTERIOUS_BASE_NODE_ID_TO_FILTER): continue node_curie_id = get_node_curie_id_from_ontology_node_id( ontology_node_id, ontology, uri_to_curie_shortener) assert not node_curie_id.startswith('UMLS:C') # :DEBUG: iri = onto_node_dict.get('id', None) if iri is None: iri = ontology_node_id # Ensure all CUI nodes use a 'umls/cui' IRI (part of fix for #565) if is_cui_id(node_curie_id): iri = CUI_BASE_IRI + '/' + get_local_id_from_curie_id( node_curie_id) if not iri.startswith('http:') and not iri.startswith('https:'): iri = prefixcommons.expand_uri(iri) if node_curie_id.startswith( 'NCBIGene:') or node_curie_id.startswith('HGNC:'): iri = prefixcommons.expand_uri(node_curie_id) generated_iri = prefixcommons.expand_uri(node_curie_id) if generated_iri != node_curie_id: if (generated_iri.startswith('http:') or generated_iri.startswith('https:')) and \ generated_iri != iri: iri = generated_iri node_name = onto_node_dict.get('label', None) node_full_name = None [node_category_label, ontology_id_of_node_with_category ] = get_biolink_category_for_node(ontology_node_id, node_curie_id, ontology, curies_to_categories, uri_to_curie_shortener, set(), False) node_deprecated = False node_description = None node_creation_date = None node_update_date = None node_replaced_by_curie = None node_full_name = None node_publications = set() node_synonyms = set() node_xrefs = set() node_tui = None node_has_cui = False node_tui_category_label = None node_meta = onto_node_dict.get('meta', None) if node_meta is not None: node_deprecated = node_meta.get('deprecated', False) node_definition = node_meta.get('definition', None) if node_definition is not None: node_description = node_definition['val'] if node_description.startswith( 'OBSOLETE:') or node_description.startswith( 'Obsolete.'): continue node_definition_xrefs = node_definition.get('xrefs', None) if node_definition_xrefs is not None: assert type(node_definition_xrefs) == list for xref in node_definition_xrefs: xref_pub = xref_as_a_publication(xref) if xref_pub is not None: node_publications.add(xref_pub) node_synonyms_list = node_meta.get('synonyms', None) if node_synonyms_list is not None: for syn_dict in node_synonyms_list: syn_pred = syn_dict['pred'] if syn_pred == 'hasExactSynonym': node_synonyms.add(syn_dict['val']) syn_xrefs = syn_dict['xrefs'] if len(syn_xrefs) > 0: for syn_xref in syn_xrefs: syn_xref_pub = xref_as_a_publication( syn_xref) if syn_xref_pub is not None: node_publications.add(syn_xref_pub) node_xrefs_list = node_meta.get('xrefs', None) if node_xrefs_list is not None: for xref_dict in node_xrefs_list: xref_curie = xref_dict['val'] if xref_curie.startswith('MESH:'): xref_curie = xref_curie.replace('MESH:', 'MSH:') elif xref_curie.startswith('UMLS:C'): xref_curie = CUI_PREFIX + ':' + xref_curie.split( 'UMLS:')[1] node_xrefs.add(xref_curie) basic_property_values = node_meta.get('basicPropertyValues', None) if basic_property_values is not None: node_tui_list = [] for basic_property_value_dict in basic_property_values: bpv_pred = basic_property_value_dict['pred'] bpv_pred_curie = uri_to_curie_shortener(bpv_pred) if bpv_pred_curie is None: bpv_pred_curie = bpv_pred bpv_val = basic_property_value_dict['val'] if bpv_pred_curie in [ 'OIO:creation_date', 'dcterms:issued', 'HGNC:DATE_CREATED' ]: node_creation_date = bpv_val elif bpv_pred_curie == 'HGNC:DATE_LAST_MODIFIED': node_update_date = bpv_val elif bpv_pred_curie == 'IAL:0100001': assert node_deprecated node_replaced_by_uri = bpv_val node_replaced_by_curie = uri_to_curie_shortener( node_replaced_by_uri) elif bpv_pred_curie == 'UMLS:STY': # STY_BASE_IRI: node_tui_list.append(bpv_val) elif bpv_pred_curie == 'skos:prefLabel': if not node_curie_id.startswith('HGNC:'): node_name = bpv_val else: node_full_name = bpv_val if node_name is None: node_name = node_full_name elif bpv_pred_curie == 'skos:altLabel': node_synonyms.add(bpv_val) elif bpv_pred_curie == 'skos:definition': node_description = kg2_util.strip_html(bpv_val) elif bpv_pred_curie == 'HGNC:GENESYMBOL': node_name = bpv_val node_synonyms.add(bpv_val) elif bpv_pred_curie == 'UMLS:cui': node_has_cui = True if len(node_tui_list) == 1: node_tui = node_tui_list[0] node_tui_uri = posixpath.join( 'https://identifiers.org/umls/STY', node_tui) node_tui_curie = uri_to_curie_shortener(node_tui_uri) assert node_tui_curie is not None [node_tui_category_label, _] = get_biolink_category_for_node( node_tui_uri, node_tui_curie, ontology, curies_to_categories, uri_to_curie_shortener, set(), True) node_comments = node_meta.get('comments', None) if node_comments is not None: comments_str = 'COMMENTS: ' + (' // '.join(node_comments)) if node_description is not None: node_description += ' // ' + comments_str else: node_description = comments_str if node_category_label is None: node_type = onto_node_dict.get('type', None) if node_type is not None and node_type == 'PROPERTY': node_category_label = 'property' if node_category_label is None: if not node_deprecated: kg2_util.log_message("Node does not have a category", ontology.id, node_curie_id, output_stream=sys.stderr) node_category_label = 'unknown category' else: node_category_label = 'deprecated node' if node_has_cui: assert node_tui is not None or len(node_tui_list) > 0 if node_tui_category_label is None: node_tui_category_label = 'unknown category' if node_tui is not None: kg2_util.log_message( message='Node ' + ontology_node_id + ' has CUI whose TUI cannot be mapped to category: ' + node_tui) else: kg2_util.log_message( message='Node ' + ontology_node_id + ' has CUI with multiple associated TUIs: ' + ', '.join(node_tui_list)) else: if node_category_label is None: node_category_label = node_tui_category_label # override the node category label if we have a TUI node_tui_category_iri = category_label_to_iri_mapper( node_tui_category_label) ontology_curie_id = ontologies_iris_to_curies[iri_of_ontology] source_ontology_information = ret_dict.get(ontology_curie_id, None) if source_ontology_information is None: kg2_util.log_message( message= "ontology IRI has no information dictionary available", ontology_name=iri_of_ontology, output_stream=sys.stderr) assert False source_ontology_update_date = source_ontology_information[ 'update date'] if node_update_date is None: node_update_date = source_ontology_update_date if node_description is not None: node_description_xrefs_match = REGEX_XREF_END_DESCRIP.match( node_description) if node_description_xrefs_match is not None: node_description_xrefs_str = node_description_xrefs_match[ 1] node_description_xrefs_list = node_description_xrefs_str.split( ',') for node_description_xref_str in node_description_xrefs_list: node_description_xref_str = node_description_xref_str.strip( ) if ':' in node_description_xref_str: node_xrefs.add(node_description_xref_str) node_description_pubs = REGEX_PUBLICATIONS.findall( node_description) for pub_curie in node_description_pubs: node_publications.add(pub_curie) # deal with node names that are ALLCAPS if node_name is not None and node_name.isupper(): node_name = kg2_util.allcaps_to_only_first_letter_capitalized( node_name) node_dict = kg2_util.make_node(node_curie_id, iri, node_name, node_category_label, node_update_date, iri_of_ontology) node_dict['full name'] = node_full_name node_dict['description'] = node_description node_dict[ 'creation date'] = node_creation_date # slot name is not biolink standard node_dict[ 'deprecated'] = node_deprecated # slot name is not biolink standard node_dict[ 'replaced by'] = node_replaced_by_curie # slot name is not biolink standard node_dict['ontology node ids'] = [ ontology_node_id ] # slot name is not biolink standard node_dict['xrefs'] = list( node_xrefs) # slot name is not biolink standard node_dict['synonym'] = list( node_synonyms) # slot name is not biolink standard node_dict['publications'] = list(node_publications) # check if we need to make a CUI node if node_meta is not None and basic_property_values is not None: for basic_property_value_dict in basic_property_values: bpv_pred = basic_property_value_dict['pred'] bpv_pred_curie = uri_to_curie_shortener(bpv_pred) bpv_val = basic_property_value_dict['val'] if bpv_pred_curie == 'UMLS:cui': # CUI_BASE_IRI: cui_node_dict = dict(node_dict) cui_uri = bpv_pred + '/' + bpv_val cui_curie = uri_to_curie_shortener(cui_uri) assert cui_curie is not None assert not cui_curie.startswith('UMLS:C') # :DEBUG: # Skip this CUI if it's identical to the ontology node itself (happens with files created # using 'load_on_cuis' - part of fix for issue #565) if get_local_id_from_curie_id( cui_curie) == get_local_id_from_curie_id( node_curie_id): continue cui_node_dict['id'] = cui_curie cui_node_dict['iri'] = cui_uri cui_node_dict['synonym'] = [] cui_node_dict['category'] = node_tui_category_iri cui_node_dict[ 'category label'] = node_tui_category_label.replace( ' ', '_') cui_node_dict['ontology node ids'] = [] cui_node_dict['provided by'] = CUI_BASE_IRI cui_node_dict['xrefs'] = [ ] # blanking the "xrefs" here is *vital* in order to avoid issue #395 cui_node_dict_existing = ret_dict.get(cui_curie, None) if cui_node_dict_existing is not None: cui_node_dict = kg2_util.merge_two_dicts( cui_node_dict, cui_node_dict_existing) ret_dict[cui_curie] = cui_node_dict node_dict_xrefs = node_dict['xrefs'] node_dict_xrefs.append(cui_curie) node_dict['xrefs'] = list(set(node_dict_xrefs)) elif bpv_pred_curie == 'HGNC:ENTREZGENE_ID': entrez_gene_id = bpv_val entrez_node_dict = dict(node_dict) entrez_curie = 'NCBIGene:' + entrez_gene_id entrez_node_dict['id'] = entrez_curie entrez_node_dict[ 'iri'] = 'https://identifiers.org/NCBIGene/' + entrez_gene_id ret_dict[entrez_curie] = entrez_node_dict node_dict_xrefs = node_dict['xrefs'] node_dict_xrefs.append(entrez_curie) node_dict['xrefs'] = list(set(node_dict_xrefs)) if node_curie_id in ret_dict: node_dict = kg2_util.merge_two_dicts(ret_dict[node_curie_id], node_dict) ret_dict[node_curie_id] = node_dict return ret_dict
for result_item_list in edges_result ] for edge_dict in edges_list: del edge_dict['is_defined_by'] del edge_dict['seed_node_uuid'] del edge_dict['source_node_uuid'] del edge_dict['target_node_uuid'] predicate_label = edge_dict['relation'] edge_dict['edge label'] = predicate_label del edge_dict['relation'] [relation, relation_curie] = kg2_util.predicate_label_to_iri_and_curie( predicate_label, KG1_RELATION_CURIE_PREFIX, KG1_RELATION_IRI_PREFIX) if relation_curie == 'BioLink:subclass_of': relation_curie = 'rdfs:subClassOf' relation = prefixcommons.expand_uri(relation_curie) edge_dict['relation'] = relation edge_dict['relation curie'] = relation_curie edge_dict['negated'] = False publications = edge_dict.get('publications', None) if publications is not None and publications != '': publications = publications.split(',') else: publications = [] edge_dict['publications'] = publications edge_dict['update date'] = None provided_by = edge_dict['provided_by'] if provided_by.startswith('DGIdb;'): provided_by = 'DGIdb' provided_by_kg2 = KG1_PROVIDED_BY_TO_KG2_IRIS.get(provided_by, None) edge_dict['provided by'] = provided_by_kg2