def __init__(self, entities): graph = KnowledgeGraph() #num_entities = graph.get_num_nodes() self.entity_list = entities self.entities = {item: idx for idx, item in enumerate(entities)} self.relation_list = ['NO_OP', 'DUMMY_RELATION', 'DONE'] + [ record["predicate"] for record in graph.get_predicates() ] self.relations = { item: idx for idx, item in enumerate(self.relation_list) } spec = {"id": "neoenv"} super().__init__(graph, len(self.entities), len(self.relations), spec)
import pandas as pd from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph from reasoner.knowledge_graph.SemmedDbTools import SemmedDbTools sdb_tools = SemmedDbTools() kg = KnowledgeGraph() sem2type = { 'dsyn': 'Disease', 'neop': 'Disease', 'fndg': 'Disease', 'sosy': 'Symptom', 'tisu': 'Tissue', 'bpoc': 'Tissue', 'blor': 'Tissue', 'cell': 'Cell', 'moft': 'Pathway', 'celf': 'Pathway' } terms = sdb_tools.get_terms() for term in terms: semtypes = set(term['semtype'].split(',')) st_raw = semtypes.copy() for st in st_raw: if st in sem2type: semtypes.add(sem2type[term['semtype']]) kg.add_umls_term("UMLS:" + term['cui'], term['name'], semtypes) triples = sdb_tools.get_triples() for triple in triples:
from reasoner.knowledge_graph.ChemblTools import ChemblTools from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery kg = KnowledgeGraph() uq = UmlsQuery() ct = ChemblTools() chembl_ids = kg.get_drug_chembl_ids() for chembl_id in chembl_ids: indications = ct.get_indication(chembl_id.replace('CHEMBL:', '')) for row in indications: result = uq.mesh2cui(row['mesh_id']) if result: kg.add_indication_relation(chembl_id, 'UMLS:' + result[0]['cui'])
map_file = "../data/knowledge_graph/id_maps/umls2uberon.csv" id_map_df = pandas.read_csv(map_file) id_map = {} for index, row in id_map_df.iterrows(): id_map[row['uberon_id']] = row['umls_id'] ## load ontology owlready2.onto_path.append("/data/owlready") onto = owlready2.get_ontology("http://purl.obolibrary.org/obo/uberon.owl") onto.load() obo = onto.get_namespace("http://purl.obolibrary.org/obo/") ontology_classes = obo.UBERON_0001062.descendants() ontology_classes.add(obo.UBERON_0001062) kg = KnowledgeGraph() uq = UmlsQuery() # add terms for current_class in ontology_classes: current_id = current_class.name.replace('_', ':') if current_id in id_map: cui = id_map[current_id] umls_result = uq.cui2bestname(cui) if umls_result: name = umls_result[0]['name'] else: name = current_class.label else: name = current_class.label cui = None
import pandas as pd from reasoner.knowledge_graph.ChemblTools import ChemblTools from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery cop_file = '../data/cop/cop_list_full.csv' outfile = '../data/knowledge_graph/ready_to_load/diseases.csv' cop = pd.read_csv(cop_file) unique_diseases = cop.Condition.unique() kg = KnowledgeGraph() uq = UmlsQuery() ct = ChemblTools() added = set() disease_data = pd.DataFrame(columns=["cui", "name", "hpo_id", "mesh_id"]) for term in unique_diseases: result = uq.meshterm2cui(term) if result and result[0]['mesh_id'] not in added: cui = result[0]['cui'] mesh_id = result[0]['mesh_id'] name = uq.cui2bestname(cui)[0]['name'] hpo_result = uq.cui2hpo(cui) if hpo_result: hpo_id = hpo_result[0]['hpo_id'] else: hpo_id = '' added.add(result[0]['mesh_id']) disease_data = disease_data.append( {
import pandas from reasoner.knowledge_graph.ChemblTools import ChemblTools from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph cop_drugs_file = '../data/neo4j/cop_chembl_ids_curated.csv' cop_drugs = pandas.read_csv(cop_drugs_file) ct = ChemblTools() kg = KnowledgeGraph() for index, row in cop_drugs.iterrows(): cui = row['cui'] if not pandas.isnull(row['chembl_id']): chembl_id = row['chembl_id'] else: chembl_id = None if not pandas.isnull(row['chebi_id']): chebi_id = row['chebi_id'] else: chebi_id = None if not pandas.isnull(row['drugbank_id']): drugbank_id = row['drugbank_id'] else: drugbank_id = None # print(cui, chembl_id, chebi_id, drugbank_id) kg.add_drug(cui, chembl_id, chebi_id, drugbank_id) # add targets if chembl_id is not None: targets = ct.get_targets(chembl_id) for target in targets:
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery uq = UmlsQuery() kg = KnowledgeGraph() result = uq.get_snomed_finding_sites() for row in result: kg.add_disease_finding_site_relation(row['disease_cui'], row['location_cui'])
id_map_df = pandas.read_csv(map_file) id_map = {} for index, row in id_map_df.iterrows(): id_map[row['symp_id']] = row['umls_id'] ## load ontology owlready2.onto_path.append("/data/owlready") onto = owlready2.get_ontology("http://purl.obolibrary.org/obo/symp.owl") onto.load() obo = onto.get_namespace("http://purl.obolibrary.org/obo/") ### ontology_classes = obo.SYMP_0000462.descendants() ontology_classes.add(obo.SYMP_0000462) kg = KnowledgeGraph() uq = UmlsQuery() # add terms for current_class in ontology_classes: current_id = current_class.name.replace('_', ':') if current_id in id_map: cui = id_map[current_id] umls_result = uq.cui2bestname(cui) if umls_result: name = umls_result[0]['name'] else: name = current_class.label else: name = current_class.label cui = None
id_map[row['chebi_id']] = 'UMLS:' + row['cui'] ## load ontology owlready2.onto_path.append("/data/owlready") onto = owlready2.get_ontology( "file:////home/mwawer/src/reasoner/data/knowledge_graph/primary/chebi.owl") onto.load() obo = onto.get_namespace("http://purl.obolibrary.org/obo/") ### ontology_classes = obo.CHEBI_24431.descendants() # chemical entity ontology_classes |= obo.CHEBI_50906.descendants() # role ontology_classes.add(obo.CHEBI_24431) ontology_classes.add(obo.CHEBI_50906) kg = KnowledgeGraph() uq = UmlsQuery() # add terms for current_class in ontology_classes: current_id = current_class.name.replace('_', ':') if current_id in id_map: cui = id_map[current_id] umls_result = uq.cui2bestname(cui) if umls_result: name = umls_result[0]['name'] else: name = current_class.label else: name = current_class.label cui = None
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph outfile = '../data/neo4j/all_chebi_terms.txt' kg = KnowledgeGraph() chebi_terms = [record['chebi_id'] for record in kg.get_chebi_terms()] with open(outfile, 'w') as f: for term in chebi_terms: f.write("%s\n" % term)
import pandas as pd from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph pathway_file = '../data/knowledge_graph/ready_to_load/pathways.csv' protein2pathway_file = '../data/knowledge_graph/ready_to_load/protein_to_pathways.csv' kg = KnowledgeGraph() pathways = pd.read_csv(pathway_file) pathways.fillna('', inplace=True) for index, row in pathways.iterrows(): kg.add_pathway(row['go_id'], row['name'], row['cui']) protein2pathway = pd.read_csv(protein2pathway_file) protein2pathway.fillna('', inplace=True) for index, row in protein2pathway.iterrows(): if row['db'] == 'UniProtKB': kg.add_protein_pathway_relation(row['uniprot_id'], row['go_id'], row['evidence_code'])
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery kg = KnowledgeGraph() uq = UmlsQuery() cuis = kg.get_cuis() for cui in cuis: semtypes = uq.get_semtype(cui) for record in semtypes: kg.set_semtype(cui, record['type_name'].decode())
import csv from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph outfile = 'data/knowledge_graph/export/edgelist.txt' kg = KnowledgeGraph() result = kg.get_edgelist() edgelist = [] for record in result: edgelist.append([record['start'], record['end']]) with open(outfile, 'w') as f: writer = csv.writer(f, delimiter='\t') writer.writerows(edgelist)
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph from reasoner.knowledge_graph.ChemblTools import ChemblTools kg = KnowledgeGraph() chembl = ChemblTools() drugs = kg.get_drug_chembl_ids() for chembl_id in drugs: targets = chembl.get_targets(chembl_id) for target in targets: if target['component_type'] == 'PROTEIN' and target['db_source'] == 'SWISS-PROT': kg.add_drug_target_relation( drug_chembl_id='CHEMBL:' + target['chembl_id'], target_type='Protein', target_id_type='uniprot_id', target_id='UNIPROT:' + target['accession'], activity_value=float(target['standard_value']), activity_type=target['standard_type'], activity_unit=target['standard_units'])
trials = trials + 1 continue # ontology_classes = obo.HP_0031797.descendants() # ontology_classes = obo.HP_0012823.descendants() # ontology_classes = obo.HP_0040279.descendants() # ontology_classes = obo.HP_0000005.descendants() # ontology_classes = obo.UPHENO_0001001.descendants() # ontology_classes.add(obo.HP_0000001) # ontology_classes.add(obo.HP_0031797) # ontology_classes.add(obo.HP_0012823) # ontology_classes.add(obo.HP_0040279) # ontology_classes.add(obo.HP_0000005) # ontology_classes.add(obo.UPHENO_0001001) kg = KnowledgeGraph() uq = UmlsQuery() # add terms for current_class in ontology_classes: current_id = current_class.name.replace('_', ':') umls_result = uq.hpo2cui(current_id) if umls_result: name = umls_result[0]['name'] cui = 'UMLS:' + umls_result[0]['cui'] else: name = current_class.label cui = None kg.add_hpo_term(current_id, name, cui) # add relations
import pandas as pd from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph disease_file = '../data/knowledge_graph/ready_to_load/diseases.csv' disease = pd.read_csv(disease_file) disease.fillna('', inplace=True) kg = KnowledgeGraph() for index, row in disease.iterrows(): if row['hpo_id'] == '': kg.add_disease(row['cui'], row['name'], row['mesh_id']) else: kg.add_disease(row['cui'], row['name'], row['mesh_id'], row['hpo_id'])
np.random.seed(439572) def get_preferred_label(labels): if 'Drug' in labels: return ('Drug') elif 'Disease' in labels: return ('Disease') else: return ("ChebiTerm") outfolder = 'translator_test_1' kg = KnowledgeGraph() result = kg.query(""" MATCH path = (dr:Drug)-[:HAS_ROLE]->(t:ChebiTerm)--(dis:Disease)--(dr) UNWIND relationships(path) as r RETURN startNode(r) as start, r, endNode(r) as end """) graph_triples = [] target_triples = [] for record in result: start_term = get_preferred_label(record['start'].labels) + '_' + re.sub( r'[ ,\'-]', "", record['start']['name']) end_term = get_preferred_label(record['end'].labels) + '_' + re.sub( r'[ ,\'-]', "", record['end']['name']) relation = record['r'].type
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph gene_file = '../data/knowledge_graph/ready_to_load/hgnc_genes_proteins.csv' kg = KnowledgeGraph() with open(gene_file) as f: first_line = f.readline() for line in f: items = line.strip('\n').split(',') kg.add_gene(hgnc_id=items[0], hgnc_symbol=items[1], entrez_id=items[3], name=items[2]) kg.add_protein(uniprot_id=items[4], name=items[2]) kg.add_gene_product_relation(hgnc_id=items[0], uniprot_id=items[4])
owlready2.onto_path.append("/data/owlready") onto = owlready2.get_ontology("http://purl.obolibrary.org/obo/cl.owl") onto.load() obo = onto.get_namespace("http://purl.obolibrary.org/obo/") ### ontology_classes = obo.CL_0000003.descendants() ontology_classes |= obo.CL_0001034.descendants() ontology_classes |= obo.CL_0001061.descendants() ontology_classes.add(obo.CL_0000000) # ontology_classes.add(obo.CL_0000003) # ontology_classes.add(obo.CL_0001034) # ontology_classes.add(obo.CL_0001061) kg = KnowledgeGraph() uq = UmlsQuery() # add terms for current_class in ontology_classes: current_id = current_class.name.replace('_', ':') if current_id in id_map: cui = id_map[current_id] umls_result = uq.cui2bestname(cui) if umls_result: name = umls_result[0]['name'] else: name = current_class.label else: name = current_class.label cui = None