def main(path, output, model): if model is not None: bmt.load(model) t = JsonTransformer() t.parse(path) def curie_to_label(curie:str): """ Uses the biolink model toolkit to look up an element (on the tree rooted at `named thing` and `related to`) for a given curie. If none can be found then returns the original curie. """ if isinstance(curie, (list, tuple, set)): return [curie_to_label(c) for c in curie] elif isinstance(curie, str): e = bmt.get_by_mapping(curie) return e if e is not None else curie else: return None for n, attr in t.graph.nodes(data=True): attr['category'] = curie_to_label(attr.get('category')) for s, o, attr in t.graph.edges(data=True): attr['predicate'] = curie_to_label(attr.get('predicate')) t.save(output)
def test_json_save(): t = JsonTransformer() t.parse(os.path.join(resource_dir, 'valid.json')) assert t.graph.number_of_nodes() == 6 assert t.graph.number_of_edges() == 5 t.save(os.path.join(target_dir, 'graph.json')) assert os.path.exists(os.path.join(target_dir, 'graph.json'))
def main(path, output, model): if model is not None: bmt.load(model) t = JsonTransformer() t.parse(path) t = PandasTransformer(t.graph) t.save(output)
def test_neo_to_graph_upload(): """ loads a neo4j graph from a json file """ jt = JsonTransformer() jt.parse('resources/robodb2.json') nt = NeoTransformer(jt.graph, host='localhost', port='7474', username='******', password='******') nt.save_with_unwind() nt.neo4j_report()
def test_validate_json(): """ Validate against a valid representative Biolink Model compliant JSON """ json_file = os.path.join(resource_dir, 'valid.json') jt = JsonTransformer() jt.parse(json_file) validator = Validator() e = validator.validate(jt.graph) assert len(e) == 0
def test_load(): """ Test for loading into JsonTransformer """ json_file = os.path.join(resource_dir, 'semmed/gene.json') jt = JsonTransformer() jt.parse(json_file) edge_list = list(jt.graph.edges(data=True)) assert edge_list[0][-1]['subject'] == 'UMLS:C0948075' assert edge_list[0][-1]['object'] == 'UMLS:C1290952'
def test_export(): """ Test export behavior of JsonTransformer """ json_file = os.path.join(resource_dir, 'semmed/gene.json') output_file = os.path.join(target_dir, 'semmeddb_export.json') jt = JsonTransformer() jt.parse(json_file) jt.save(output_file) assert os.path.isfile(output_file)
def test_neo_to_graph_upload(): """ loads a neo4j graph from a json file """ jt = JsonTransformer() jt.parse('resources/robodb2.json') nt = NeoTransformer(jt.graph, uri=DEFAULT_NEO4J_URL, username=DEFAULT_NEO4J_USERNAME, password=DEFAULT_NEO4J_PASSWORD) nt.save() nt.neo4j_report()
def test_json_load(): t = JsonTransformer() t.parse(os.path.join(resource_dir, 'valid.json')) assert t.graph.number_of_nodes() == 6 assert t.graph.number_of_edges() == 5 n = t.graph.nodes['MONDO:0017148'] assert isinstance(n, dict) assert 'id' in n and n['id'] == 'MONDO:0017148' assert n['name'] == 'heritable pulmonary arterial hypertension' assert n['category'][0] == 'biolink:Disease' data = t.graph.get_edge_data('HGNC:11603', 'MONDO:0017148') assert len(data.keys()) == 1 data = data.popitem()[1] assert data['subject'] == 'HGNC:11603' assert data['object'] == 'MONDO:0017148' assert data['edge_label'] == 'biolink:related_to' assert data['relation'] == 'RO:0004013'
from kgx import JsonTransformer, clique_merge t = JsonTransformer() t.parse('results/hp.owl') t.parse('results/mondo.json') t.parse('results/hgnc.json') t.parse('results/clinvar.json') t.parse('results/omim.json') t.parse('results/hpoa.json') t.parse('results/orphanet.json') #t = PandasTransformer(t.graph) #t.parse('data/semmeddb_edges.csv') #t.parse('data/semmeddb_nodes.csv') t.graph = clique_merge(t.graph) t.save('results/clique_merged.json')
""" This script prepares the clique_merged.json file for uploading to Neo4j - Removes nodes that cannot be categorized into the biolink model - Renames edge labels that don't matche the biolink model to "related_to" - Transforms into CSV format """ from kgx import JsonTransformer, PandasTransformer import bmt t = JsonTransformer() t.parse('results/clique_merged.json') t = PandasTransformer(t) G = t.graph size = len(G) nodes = [] for n, data in G.nodes(data=True): data['category'] = [ c for c in data.get('category', []) if bmt.get_class(c) is not None ] if data['category'] == []: if 'name' in data: data['category'] = ['named thing'] else: nodes.append(n) G.remove_nodes_from(nodes)
from kgx import JsonTransformer, clique_merge import sys path = sys.argv[1] t = JsonTransformer() t.parse(path) t.graph = clique_merge(t.graph) t.save('clique_merged.json')
Loads all the turtle files with their required ontologies and transforms them to json. Then loads all these json files, along with the semmeddb edges.csv and nodes.csv files, into a single NetworkX graph, and performs `clique_merge` on it. Finally, saves the resulting NetworkX graph as `clique_merged.csv` """ from kgx import ObanRdfTransformer2, JsonTransformer, HgncRdfTransformer, RdfOwlTransformer2 from kgx import clique_merge, make_valid_types t = RdfOwlTransformer2() t.parse('data/hp.owl') t = JsonTransformer(t) t.save('results/hp.json') t = RdfOwlTransformer2() t.parse('data/mondo.owl') t = JsonTransformer(t) t.save('results/mondo.json') t = HgncRdfTransformer() t.parse('data/hgnc.ttl') t = JsonTransformer(t) t.save('results/hgnc.json') t = ObanRdfTransformer2() t.add_ontology('data/mondo.owl') t.add_ontology('data/hp.owl') t.parse('data/orphanet.ttl') t = JsonTransformer(t) t.save('results/orphanet.json')
from kgx import ObanRdfTransformer, JsonTransformer, HgncRdfTransformer from kgx import clique_merge t = JsonTransformer() t.parse('hgnc.json') t.parse('clinvar.json') t.parse('omim.json') t.parse('hpoa.json') t.parse('orphanet.json') t.save('merged.json') t.graph = clique_merge(t.graph) t.save('clique_merged.json')
from kgx import ObanRdfTransformer, JsonTransformer, HgncRdfTransformer from kgx import clique_merge t = JsonTransformer() #t.parse('hgnc.json') #t.parse('clinvar.json') #t.parse('omim.json') #t.parse('hpoa.json') #t.parse('orphanet.json') t.parse('semmeddb.json') t.parse('merged.json') t.save('merged.json') t.graph = clique_merge(t.graph) t.save('clique_merged.json')