Ejemplo n.º 1
0
def format_edge(subject_id: str, object_id: str, predicate_label: str):
    relation_curie = kg2_util.predicate_label_to_curie(
        predicate_label, REACTOME_RELATION_CURIE_PREFIX)
    if predicate_label == kg2_util.EDGE_LABEL_BIOLINK_SAME_AS:
        return kg2_util.make_edge_biolink(subject_id, object_id,
                                          predicate_label,
                                          REACTOME_KB_CURIE_ID, None)
    return kg2_util.make_edge(subject_id, object_id, relation_curie,
                              predicate_label, REACTOME_KB_CURIE_ID)
Ejemplo n.º 2
0
def format_edge(subject_id, object_id, predicate, update_date):
    relation_curie = kg2_util.predicate_label_to_curie(
        predicate, DRUGCENTRAL_RELATION_CURIE_PREFIX)
    if predicate == kg2_util.EDGE_LABEL_BIOLINK_SAME_AS:
        return kg2_util.make_edge_biolink(subject_id, object_id, predicate,
                                          DRUGCENTRAL_SOURCE, update_date)
    else:
        return kg2_util.make_edge(subject_id, object_id, relation_curie,
                                  predicate, DRUGCENTRAL_SOURCE, update_date)
Ejemplo n.º 3
0
def format_edge(subject_id: str,
                object_id: str,
                predicate_label: str,
                description: str,
                publications: list = None):
    relation_curie = kg2_util.predicate_label_to_curie(
        predicate_label, DRUGBANK_RELATION_CURIE_PREFIX)

    edge = kg2_util.make_edge(subject_id, object_id, relation_curie,
                              predicate_label, DRUGBANK_KB_CURIE_ID, None)

    if description is not None:
        edge["publications_info"] = {"sentence": description}

    if publications is not None:
        edge["publications"] = publications

    return edge
Ejemplo n.º 4
0
def make_hmdb_edge(subject_id: str, object_id: str, subject_prefix: str,
                   object_prefix: str, predicate_label: str, update_date: str,
                   publications_info: dict):
    relation_curie = kg2_util.predicate_label_to_curie(predicate_label,
                                                       CURIE_PREFIX_HMDB)
    subject = subject_prefix + ":" + subject_id
    object = object_id
    if object_prefix is not None:
        object = object_prefix + ":" + object_id
    if predicate_label == kg2_util.EDGE_LABEL_BIOLINK_SAME_AS:
        edge = kg2_util.make_edge_biolink(subject, object, predicate_label,
                                          HMDB_PROVIDED_BY_CURIE_ID,
                                          update_date)

    else:
        edge = kg2_util.make_edge(subject, object, relation_curie,
                                  predicate_label, HMDB_PROVIDED_BY_CURIE_ID,
                                  update_date)
    edge["publications_info"] = publications_info

    return edge
Ejemplo n.º 5
0
     kg2_util.merge_two_dicts(
         {
             'subject': result_item_list[0],
             'object': result_item_list[2]
         }, result_item_list[1]['data'])
     for result_item_list in edges_result
 ]
 for edge_dict in edges_list:
     del edge_dict['is_defined_by']
     del edge_dict['seed_node_uuid']
     del edge_dict['source_node_uuid']
     del edge_dict['target_node_uuid']
     predicate_label = edge_dict['relation']
     edge_dict['edge_label'] = predicate_label
     del edge_dict['relation']
     relation_curie = kg2_util.predicate_label_to_curie(
         predicate_label, KG1_RELATION_CURIE_PREFIX)
     if relation_curie == 'bioLink:subclass_of':
         relation_curie = kg2_util.CURIE_ID_RDFS_SUBCLASS_OF
     edge_dict['relation'] = relation_curie
     edge_dict['negated'] = False
     publications = edge_dict.get('publications', None)
     if publications is not None and publications != '':
         publications = publications.split(',')
     else:
         publications = []
     edge_dict['publications'] = publications
     edge_dict['update_date'] = None
     provided_by = edge_dict['provided_by']
     if provided_by.startswith('DGIdb;'):
         provided_by = 'DGIdb'
     provided_by_kg2 = KG1_PROVIDED_BY_TO_KG2_PROVIDED_BY_CURIE_IDS.get(
Ejemplo n.º 6
0
    return [{"nodes": nodes, "edges": edges}, update_date]


if __name__ == '__main__':
    print("Starting Script:", date())
    args = get_args()
    print("Starting PubMed ID Load:", date())
    pmids = set(json.load(open(args.kg2PMIDs)))
    print("Finishing PubMedID Load:", date(), ",", len(pmids), "PMIDs in KG2")
    pubmed_dir = args.inputDirectory
    nodes = []
    edges = []
    latest_date = 0
    mesh_predicate_label = "references"
    mesh_relation_curie = kg2_util.predicate_label_to_curie(
        mesh_predicate_label, kg2_util.CURIE_PREFIX_PMID)
    for filename in os.listdir(pubmed_dir):
        if ".gz" in filename:
            print("Starting Load of", filename, ":", date())
            xml_file = gzip.open(pubmed_dir + filename)
            data = xmltodict.parse(xml_file.read())
            print("Finished Load of", filename, ":", date())
            articles = data["PubmedArticleSet"]["PubmedArticle"]

            for article in articles:
                [data,
                 update_date] = make_node_and_edges(article,
                                                    mesh_predicate_label,
                                                    mesh_relation_curie)
                for node in data["nodes"]:
                    nodes.append(node)