Ejemplo n.º 1
0
 def __init__(self, entities):
     graph = KnowledgeGraph()
     #num_entities = graph.get_num_nodes()
     self.entity_list = entities
     self.entities = {item: idx for idx, item in enumerate(entities)}
     self.relation_list = ['NO_OP', 'DUMMY_RELATION', 'DONE'] + [
         record["predicate"] for record in graph.get_predicates()
     ]
     self.relations = {
         item: idx
         for idx, item in enumerate(self.relation_list)
     }
     spec = {"id": "neoenv"}
     super().__init__(graph, len(self.entities), len(self.relations), spec)
Ejemplo n.º 2
0
import pandas as pd
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph
from reasoner.knowledge_graph.SemmedDbTools import SemmedDbTools

sdb_tools = SemmedDbTools()
kg = KnowledgeGraph()

sem2type = {
    'dsyn': 'Disease',
    'neop': 'Disease',
    'fndg': 'Disease',
    'sosy': 'Symptom',
    'tisu': 'Tissue',
    'bpoc': 'Tissue',
    'blor': 'Tissue',
    'cell': 'Cell',
    'moft': 'Pathway',
    'celf': 'Pathway'
}

terms = sdb_tools.get_terms()
for term in terms:
    semtypes = set(term['semtype'].split(','))
    st_raw = semtypes.copy()
    for st in st_raw:
        if st in sem2type:
            semtypes.add(sem2type[term['semtype']])
    kg.add_umls_term("UMLS:" + term['cui'], term['name'], semtypes)

triples = sdb_tools.get_triples()
for triple in triples:
Ejemplo n.º 3
0
from reasoner.knowledge_graph.ChemblTools import ChemblTools
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph
from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery

kg = KnowledgeGraph()
uq = UmlsQuery()
ct = ChemblTools()

chembl_ids = kg.get_drug_chembl_ids()
for chembl_id in chembl_ids:
    indications = ct.get_indication(chembl_id.replace('CHEMBL:', ''))
    for row in indications:
        result = uq.mesh2cui(row['mesh_id'])
        if result:
            kg.add_indication_relation(chembl_id, 'UMLS:' + result[0]['cui'])
Ejemplo n.º 4
0
map_file = "../data/knowledge_graph/id_maps/umls2uberon.csv"
id_map_df = pandas.read_csv(map_file)
id_map = {}
for index, row in id_map_df.iterrows():
    id_map[row['uberon_id']] = row['umls_id']

## load ontology
owlready2.onto_path.append("/data/owlready")
onto = owlready2.get_ontology("http://purl.obolibrary.org/obo/uberon.owl")
onto.load()
obo = onto.get_namespace("http://purl.obolibrary.org/obo/")

ontology_classes = obo.UBERON_0001062.descendants()
ontology_classes.add(obo.UBERON_0001062)

kg = KnowledgeGraph()
uq = UmlsQuery()

# add terms
for current_class in ontology_classes:
    current_id = current_class.name.replace('_', ':')
    if current_id in id_map:
        cui = id_map[current_id]
        umls_result = uq.cui2bestname(cui)
        if umls_result:
            name = umls_result[0]['name']
        else:
            name = current_class.label
    else:
        name = current_class.label
        cui = None
Ejemplo n.º 5
0
import pandas as pd
from reasoner.knowledge_graph.ChemblTools import ChemblTools
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph
from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery

cop_file = '../data/cop/cop_list_full.csv'
outfile = '../data/knowledge_graph/ready_to_load/diseases.csv'

cop = pd.read_csv(cop_file)
unique_diseases = cop.Condition.unique()

kg = KnowledgeGraph()
uq = UmlsQuery()
ct = ChemblTools()

added = set()
disease_data = pd.DataFrame(columns=["cui", "name", "hpo_id", "mesh_id"])
for term in unique_diseases:
    result = uq.meshterm2cui(term)
    if result and result[0]['mesh_id'] not in added:
        cui = result[0]['cui']
        mesh_id = result[0]['mesh_id']
        name = uq.cui2bestname(cui)[0]['name']
        hpo_result = uq.cui2hpo(cui)
        if hpo_result:
            hpo_id = hpo_result[0]['hpo_id']
        else:
            hpo_id = ''
        added.add(result[0]['mesh_id'])
        disease_data = disease_data.append(
            {
Ejemplo n.º 6
0
import pandas
from reasoner.knowledge_graph.ChemblTools import ChemblTools
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph

cop_drugs_file = '../data/neo4j/cop_chembl_ids_curated.csv'
cop_drugs = pandas.read_csv(cop_drugs_file)

ct = ChemblTools()
kg = KnowledgeGraph()

for index, row in cop_drugs.iterrows():
    cui = row['cui']
    if not pandas.isnull(row['chembl_id']):
        chembl_id = row['chembl_id']
    else:
        chembl_id = None
    if not pandas.isnull(row['chebi_id']):
        chebi_id = row['chebi_id']
    else:
        chebi_id = None
    if not pandas.isnull(row['drugbank_id']):
        drugbank_id = row['drugbank_id']
    else:
        drugbank_id = None
    # print(cui, chembl_id, chebi_id, drugbank_id)
    kg.add_drug(cui, chembl_id, chebi_id, drugbank_id)
    
    # add targets
    if chembl_id is not None:
        targets = ct.get_targets(chembl_id)
    for target in targets:
Ejemplo n.º 7
0
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph
from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery

uq = UmlsQuery()
kg = KnowledgeGraph()
result = uq.get_snomed_finding_sites()
for row in result:
    kg.add_disease_finding_site_relation(row['disease_cui'],
                                         row['location_cui'])
Ejemplo n.º 8
0
id_map_df = pandas.read_csv(map_file)
id_map = {}
for index, row in id_map_df.iterrows():
    id_map[row['symp_id']] = row['umls_id']

## load ontology
owlready2.onto_path.append("/data/owlready")
onto = owlready2.get_ontology("http://purl.obolibrary.org/obo/symp.owl")
onto.load()
obo = onto.get_namespace("http://purl.obolibrary.org/obo/")

###
ontology_classes = obo.SYMP_0000462.descendants()
ontology_classes.add(obo.SYMP_0000462)

kg = KnowledgeGraph()
uq = UmlsQuery()

# add terms
for current_class in ontology_classes:
    current_id = current_class.name.replace('_', ':')
    if current_id in id_map:
        cui = id_map[current_id]
        umls_result = uq.cui2bestname(cui)
        if umls_result:
            name = umls_result[0]['name']
        else:
            name = current_class.label
    else:
        name = current_class.label
        cui = None
Ejemplo n.º 9
0
    id_map[row['chebi_id']] = 'UMLS:' + row['cui']

## load ontology
owlready2.onto_path.append("/data/owlready")
onto = owlready2.get_ontology(
    "file:////home/mwawer/src/reasoner/data/knowledge_graph/primary/chebi.owl")
onto.load()
obo = onto.get_namespace("http://purl.obolibrary.org/obo/")

###
ontology_classes = obo.CHEBI_24431.descendants()  # chemical entity
ontology_classes |= obo.CHEBI_50906.descendants()  # role
ontology_classes.add(obo.CHEBI_24431)
ontology_classes.add(obo.CHEBI_50906)

kg = KnowledgeGraph()
uq = UmlsQuery()

# add terms
for current_class in ontology_classes:
    current_id = current_class.name.replace('_', ':')
    if current_id in id_map:
        cui = id_map[current_id]
        umls_result = uq.cui2bestname(cui)
        if umls_result:
            name = umls_result[0]['name']
        else:
            name = current_class.label
    else:
        name = current_class.label
        cui = None
Ejemplo n.º 10
0
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph

outfile = '../data/neo4j/all_chebi_terms.txt'

kg = KnowledgeGraph()
chebi_terms = [record['chebi_id'] for record in kg.get_chebi_terms()]

with open(outfile, 'w') as f:
    for term in chebi_terms:
        f.write("%s\n" % term)
Ejemplo n.º 11
0
import pandas as pd
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph

pathway_file = '../data/knowledge_graph/ready_to_load/pathways.csv'
protein2pathway_file = '../data/knowledge_graph/ready_to_load/protein_to_pathways.csv'

kg = KnowledgeGraph()
pathways = pd.read_csv(pathway_file)
pathways.fillna('', inplace=True)
for index, row in pathways.iterrows():
    kg.add_pathway(row['go_id'], row['name'], row['cui'])

protein2pathway = pd.read_csv(protein2pathway_file)
protein2pathway.fillna('', inplace=True)
for index, row in protein2pathway.iterrows():
    if row['db'] == 'UniProtKB':
        kg.add_protein_pathway_relation(row['uniprot_id'], row['go_id'],
                                        row['evidence_code'])
Ejemplo n.º 12
0
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph
from reasoner.knowledge_graph.umls.UmlsQuery import UmlsQuery

kg = KnowledgeGraph()
uq = UmlsQuery()

cuis = kg.get_cuis()
for cui in cuis:
    semtypes = uq.get_semtype(cui)
    for record in semtypes:
        kg.set_semtype(cui, record['type_name'].decode())
Ejemplo n.º 13
0
import csv
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph

outfile = 'data/knowledge_graph/export/edgelist.txt'

kg = KnowledgeGraph()
result = kg.get_edgelist()

edgelist = []
for record in result:
    edgelist.append([record['start'], record['end']])

with open(outfile, 'w') as f:
    writer = csv.writer(f, delimiter='\t')
    writer.writerows(edgelist)
Ejemplo n.º 14
0
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph
from reasoner.knowledge_graph.ChemblTools import ChemblTools

kg = KnowledgeGraph()
chembl = ChemblTools()

drugs = kg.get_drug_chembl_ids()
for chembl_id in drugs:
    targets = chembl.get_targets(chembl_id)
    for target in targets:
        if target['component_type'] == 'PROTEIN' and target['db_source'] == 'SWISS-PROT':
            kg.add_drug_target_relation(
                drug_chembl_id='CHEMBL:' + target['chembl_id'],
                target_type='Protein',
                target_id_type='uniprot_id',
                target_id='UNIPROT:' + target['accession'],
                activity_value=float(target['standard_value']),
                activity_type=target['standard_type'],
                activity_unit=target['standard_units'])
        trials = trials + 1
        continue

# ontology_classes = obo.HP_0031797.descendants()
# ontology_classes = obo.HP_0012823.descendants()
# ontology_classes = obo.HP_0040279.descendants()
# ontology_classes = obo.HP_0000005.descendants()
# ontology_classes = obo.UPHENO_0001001.descendants()
# ontology_classes.add(obo.HP_0000001)
# ontology_classes.add(obo.HP_0031797)
# ontology_classes.add(obo.HP_0012823)
# ontology_classes.add(obo.HP_0040279)
# ontology_classes.add(obo.HP_0000005)
# ontology_classes.add(obo.UPHENO_0001001)

kg = KnowledgeGraph()
uq = UmlsQuery()

# add terms
for current_class in ontology_classes:
    current_id = current_class.name.replace('_', ':')
    umls_result = uq.hpo2cui(current_id)
    if umls_result:
        name = umls_result[0]['name']
        cui = 'UMLS:' + umls_result[0]['cui']
    else:
        name = current_class.label
        cui = None
    kg.add_hpo_term(current_id, name, cui)

# add relations
Ejemplo n.º 16
0
import pandas as pd
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph

disease_file = '../data/knowledge_graph/ready_to_load/diseases.csv'
disease = pd.read_csv(disease_file)
disease.fillna('', inplace=True)

kg = KnowledgeGraph()
for index, row in disease.iterrows():
    if row['hpo_id'] == '':
        kg.add_disease(row['cui'], row['name'], row['mesh_id'])
    else:
        kg.add_disease(row['cui'], row['name'], row['mesh_id'], row['hpo_id'])
Ejemplo n.º 17
0
np.random.seed(439572)


def get_preferred_label(labels):
    if 'Drug' in labels:
        return ('Drug')
    elif 'Disease' in labels:
        return ('Disease')
    else:
        return ("ChebiTerm")


outfolder = 'translator_test_1'

kg = KnowledgeGraph()

result = kg.query("""
         MATCH path = (dr:Drug)-[:HAS_ROLE]->(t:ChebiTerm)--(dis:Disease)--(dr)
         UNWIND relationships(path) as r
         RETURN startNode(r) as start, r, endNode(r) as end
         """)

graph_triples = []
target_triples = []
for record in result:
    start_term = get_preferred_label(record['start'].labels) + '_' + re.sub(
        r'[ ,\'-]', "", record['start']['name'])
    end_term = get_preferred_label(record['end'].labels) + '_' + re.sub(
        r'[ ,\'-]', "", record['end']['name'])
    relation = record['r'].type
Ejemplo n.º 18
0
from reasoner.knowledge_graph.KnowledgeGraph import KnowledgeGraph

gene_file = '../data/knowledge_graph/ready_to_load/hgnc_genes_proteins.csv'

kg = KnowledgeGraph()
with open(gene_file) as f:
    first_line = f.readline()
    for line in f:
        items = line.strip('\n').split(',')
        kg.add_gene(hgnc_id=items[0], hgnc_symbol=items[1], entrez_id=items[3], name=items[2])
        kg.add_protein(uniprot_id=items[4], name=items[2])
        kg.add_gene_product_relation(hgnc_id=items[0], uniprot_id=items[4])
Ejemplo n.º 19
0
owlready2.onto_path.append("/data/owlready")
onto = owlready2.get_ontology("http://purl.obolibrary.org/obo/cl.owl")
onto.load()
obo = onto.get_namespace("http://purl.obolibrary.org/obo/")


###
ontology_classes = obo.CL_0000003.descendants()
ontology_classes |= obo.CL_0001034.descendants()
ontology_classes |= obo.CL_0001061.descendants()
ontology_classes.add(obo.CL_0000000)
# ontology_classes.add(obo.CL_0000003)
# ontology_classes.add(obo.CL_0001034)
# ontology_classes.add(obo.CL_0001061)

kg = KnowledgeGraph()
uq = UmlsQuery()

# add terms
for current_class in ontology_classes:
    current_id = current_class.name.replace('_', ':')
    if current_id in id_map:
        cui = id_map[current_id]
        umls_result = uq.cui2bestname(cui)
        if umls_result:
            name = umls_result[0]['name']
        else:
            name = current_class.label
    else:
        name = current_class.label
        cui = None