def format_edge(subject_id: str, object_id: str, predicate_label: str): relation_curie = kg2_util.predicate_label_to_curie( predicate_label, REACTOME_RELATION_CURIE_PREFIX) if predicate_label == kg2_util.EDGE_LABEL_BIOLINK_SAME_AS: return kg2_util.make_edge_biolink(subject_id, object_id, predicate_label, REACTOME_KB_CURIE_ID, None) return kg2_util.make_edge(subject_id, object_id, relation_curie, predicate_label, REACTOME_KB_CURIE_ID)
def format_edge(subject_id, object_id, predicate, update_date): relation_curie = kg2_util.predicate_label_to_curie( predicate, DRUGCENTRAL_RELATION_CURIE_PREFIX) if predicate == kg2_util.EDGE_LABEL_BIOLINK_SAME_AS: return kg2_util.make_edge_biolink(subject_id, object_id, predicate, DRUGCENTRAL_SOURCE, update_date) else: return kg2_util.make_edge(subject_id, object_id, relation_curie, predicate, DRUGCENTRAL_SOURCE, update_date)
def format_edge(subject_id: str, object_id: str, predicate_label: str, description: str, publications: list = None): relation_curie = kg2_util.predicate_label_to_curie( predicate_label, DRUGBANK_RELATION_CURIE_PREFIX) edge = kg2_util.make_edge(subject_id, object_id, relation_curie, predicate_label, DRUGBANK_KB_CURIE_ID, None) if description is not None: edge["publications_info"] = {"sentence": description} if publications is not None: edge["publications"] = publications return edge
def make_hmdb_edge(subject_id: str, object_id: str, subject_prefix: str, object_prefix: str, predicate_label: str, update_date: str, publications_info: dict): relation_curie = kg2_util.predicate_label_to_curie(predicate_label, CURIE_PREFIX_HMDB) subject = subject_prefix + ":" + subject_id object = object_id if object_prefix is not None: object = object_prefix + ":" + object_id if predicate_label == kg2_util.EDGE_LABEL_BIOLINK_SAME_AS: edge = kg2_util.make_edge_biolink(subject, object, predicate_label, HMDB_PROVIDED_BY_CURIE_ID, update_date) else: edge = kg2_util.make_edge(subject, object, relation_curie, predicate_label, HMDB_PROVIDED_BY_CURIE_ID, update_date) edge["publications_info"] = publications_info return edge
kg2_util.merge_two_dicts( { 'subject': result_item_list[0], 'object': result_item_list[2] }, result_item_list[1]['data']) for result_item_list in edges_result ] for edge_dict in edges_list: del edge_dict['is_defined_by'] del edge_dict['seed_node_uuid'] del edge_dict['source_node_uuid'] del edge_dict['target_node_uuid'] predicate_label = edge_dict['relation'] edge_dict['edge_label'] = predicate_label del edge_dict['relation'] relation_curie = kg2_util.predicate_label_to_curie( predicate_label, KG1_RELATION_CURIE_PREFIX) if relation_curie == 'bioLink:subclass_of': relation_curie = kg2_util.CURIE_ID_RDFS_SUBCLASS_OF edge_dict['relation'] = relation_curie edge_dict['negated'] = False publications = edge_dict.get('publications', None) if publications is not None and publications != '': publications = publications.split(',') else: publications = [] edge_dict['publications'] = publications edge_dict['update_date'] = None provided_by = edge_dict['provided_by'] if provided_by.startswith('DGIdb;'): provided_by = 'DGIdb' provided_by_kg2 = KG1_PROVIDED_BY_TO_KG2_PROVIDED_BY_CURIE_IDS.get(
return [{"nodes": nodes, "edges": edges}, update_date] if __name__ == '__main__': print("Starting Script:", date()) args = get_args() print("Starting PubMed ID Load:", date()) pmids = set(json.load(open(args.kg2PMIDs))) print("Finishing PubMedID Load:", date(), ",", len(pmids), "PMIDs in KG2") pubmed_dir = args.inputDirectory nodes = [] edges = [] latest_date = 0 mesh_predicate_label = "references" mesh_relation_curie = kg2_util.predicate_label_to_curie( mesh_predicate_label, kg2_util.CURIE_PREFIX_PMID) for filename in os.listdir(pubmed_dir): if ".gz" in filename: print("Starting Load of", filename, ":", date()) xml_file = gzip.open(pubmed_dir + filename) data = xmltodict.parse(xml_file.read()) print("Finished Load of", filename, ":", date()) articles = data["PubmedArticleSet"]["PubmedArticle"] for article in articles: [data, update_date] = make_node_and_edges(article, mesh_predicate_label, mesh_relation_curie) for node in data["nodes"]: nodes.append(node)