Beispiel #1
0
def test_neo_to_graph_upload():
    """ loads a neo4j graph from a json file
    """
    jt = JsonTransformer()
    jt.parse('resources/robodb2.json')

    nt = NeoTransformer(jt.graph, host='localhost', port='7474', username='******', password='******')
    nt.save_with_unwind()
    nt.neo4j_report()
Beispiel #2
0
def test_load():
    """
    Test for loading into JsonTransformer
    """
    json_file = os.path.join(resource_dir, 'semmed/gene.json')
    jt = JsonTransformer()
    jt.parse(json_file)
    edge_list = list(jt.graph.edges(data=True))
    assert edge_list[0][-1]['subject'] == 'UMLS:C0948075'
    assert edge_list[0][-1]['object'] == 'UMLS:C1290952'
Beispiel #3
0
def test_validate_json():
    """
    Validate against a valid representative Biolink Model compliant JSON
    """
    json_file = os.path.join(resource_dir, 'valid.json')
    jt = JsonTransformer()
    jt.parse(json_file)
    validator = Validator()
    e = validator.validate(jt.graph)
    assert len(e) == 0
Beispiel #4
0
def test_neo_to_graph_upload():
    """ loads a neo4j graph from a json file
    """
    jt = JsonTransformer()
    jt.parse('resources/robodb2.json')

    nt = NeoTransformer(jt.graph,
                        uri=DEFAULT_NEO4J_URL,
                        username=DEFAULT_NEO4J_USERNAME,
                        password=DEFAULT_NEO4J_PASSWORD)
    nt.save()
    nt.neo4j_report()
Beispiel #5
0
def test_load():
    """
    load tests
    """
    t = ObanRdfTransformer()
    t.parse("tests/resources/monarch/biogrid_test.ttl")
    t.report()
    w1 = PandasTransformer(t)
    w1.save('target/biogrid-e.csv', type='e')
    w1.save('target/biogrid-n.csv', type='n')
    w2 = GraphMLTransformer(t)
    w2.save("target/x1n.graphml")
    w3 = JsonTransformer(t)
    w3.save("target/x1n.json")
Beispiel #6
0
def test_owl_load():
    """
    Load a test OWL and export as JSON
    """
    input_file = os.path.join(resource_dir, 'mody.ttl')
    output_archive_file = os.path.join(target_dir, 'mondo_test')
    output_json_file = os.path.join(target_dir, 'mondo_test.json')

    t = RdfOwlTransformer()
    t.parse(input_file, input_format='ttl')
    t.report()

    pt = PandasTransformer(t.graph)
    pt.save(output_archive_file)

    jt = JsonTransformer(t.graph)
    jt.save(output_json_file)
Beispiel #7
0
def test_load():
    """
    load and save tests
    """
    cwd = os.path.abspath(os.path.dirname(__file__))
    src_path = os.path.join(cwd, 'resources', 'monarch', 'biogrid_test.ttl')
    tpath = os.path.join(cwd, 'target')
    os.makedirs(tpath, exist_ok=True)

    tg_path = os.path.join(tpath, "test_output.ttl")

    # execute ObanRdfTransformer's parse and save function
    t = ObanRdfTransformer()
    t.parse(src_path, input_format="turtle")
    t.save(tg_path, output_format="turtle")
    t.report()

    w1 = PandasTransformer(t.graph)
    w1.save(os.path.join(tpath, 'biogrid-e.csv'), type='e')
    w1.save(os.path.join(tpath, 'biogrid-n.csv'), type='n')

    # read again the source, test graph
    src_graph = rdflib.Graph()
    src_graph.parse(src_path, format="turtle")

    # read again the dumped target graph
    tg_graph = rdflib.Graph()
    tg_graph.parse(tg_path, format="turtle")

    # compare subgraphs from the source and the target graph.
    OBAN = Namespace('http://purl.org/oban/')
    for a in src_graph.subjects(RDF.type, OBAN.association):
        oban_src_graph = rdflib.Graph()
        oban_src_graph += src_graph.triples((a, None, None))
        oban_tg_graph = rdflib.Graph()
        oban_tg_graph += tg_graph.triples((a, None, None))
        # see they are indeed identical (isomorphic)
        if not oban_src_graph.isomorphic(oban_tg_graph):
            raise RuntimeError('The subgraphs whose subject is ' + str(a) +
                               ' are not isomorphic ones.')

    w2 = GraphMLTransformer(t.graph)
    w2.save(os.path.join(tpath, "x1n.graphml"))
    w3 = JsonTransformer(t.graph)
    w3.save(os.path.join(tpath, "x1n.json"))
def main(path, output, model):
    if model is not None:
        bmt.load(model)

    t = JsonTransformer()
    t.parse(path)
    t = PandasTransformer(t.graph)
    t.save(output)
Beispiel #9
0
def test_json_load():
    t = JsonTransformer()
    t.parse(os.path.join(resource_dir, 'valid.json'))
    assert t.graph.number_of_nodes() == 6
    assert t.graph.number_of_edges() == 5

    n = t.graph.nodes['MONDO:0017148']
    assert isinstance(n, dict)
    assert 'id' in n and n['id'] == 'MONDO:0017148'
    assert n['name'] == 'heritable pulmonary arterial hypertension'
    assert n['category'][0] == 'biolink:Disease'

    data = t.graph.get_edge_data('HGNC:11603', 'MONDO:0017148')
    assert len(data.keys()) == 1
    data = data.popitem()[1]
    assert data['subject'] == 'HGNC:11603'
    assert data['object'] == 'MONDO:0017148'
    assert data['edge_label'] == 'biolink:related_to'
    assert data['relation'] == 'RO:0004013'
Beispiel #10
0
def test_load():
    """
    load tests
    """
    cwd = os.path.abspath(os.path.dirname(__file__))
    resdir = os.path.join(cwd, 'resources')
    tdir = os.path.join(cwd, 'target')
    os.makedirs(tdir, exist_ok=True)
    
    t = RdfOwlTransformer()
    fn = os.path.join(resdir, "mody.ttl.gz")
    f = gzip.open(fn, 'rb')
    t.parse(f, input_format='ttl')
    t.report()
    w1 = PandasTransformer(t.graph)
    w1.save(os.path.join(tdir, 'mondo-e.csv'), type='e')
    w1.save(os.path.join(tdir, 'mondo-n.csv'), type='n')
    w3 = JsonTransformer(t.graph)
    w3.save(os.path.join(tdir, "mondo.json"))
Beispiel #11
0
def test_load():
    """
    load TTL and save as CSV
    """
    input_file = os.path.join(resource_dir, 'monarch/biogrid_test.ttl')
    output_file = os.path.join(target_dir, 'test_output.ttl')

    t = ObanRdfTransformer()
    t.parse(input_file, input_format="turtle")
    t.report()
    t.save(output_file, output_format="turtle")

    output_archive_file = os.path.join(target_dir, 'biogrid_test')
    pt = PandasTransformer(t.graph)
    pt.save(output_archive_file)

    # read again the source, test graph
    src_graph = rdflib.Graph()
    src_graph.parse(input_file, format="turtle")

    # read again the dumped target graph
    target_graph = rdflib.Graph()
    target_graph.parse(output_file, format="turtle")

    # compare subgraphs from the source and the target graph.
    OBAN = Namespace('http://purl.org/oban/')
    for a in src_graph.subjects(RDF.type, OBAN.association):
        oban_src_graph = rdflib.Graph()
        oban_src_graph += src_graph.triples((a, None, None))
        oban_tg_graph = rdflib.Graph()
        oban_tg_graph += target_graph.triples((a, None, None))
        # see they are indeed identical (isomorphic)
        if not oban_src_graph.isomorphic(oban_tg_graph):
            print(
                'The subgraphs whose subject is {} are not isomorphic'.format(
                    a))

    # w2 = GraphMLTransformer(t.graph)
    # w2.save(os.path.join(tpath, "x1n.graphml"))
    w3 = JsonTransformer(t.graph)
    w3.save(os.path.join(target_dir, "biogrid_test.json"))
Beispiel #12
0
def main(path, output):
    G = JsonTransformer(path).graph

    for u, v, attr_dict in G.edges(data=True):
        edge_label = attr_dict['edge_label']

        try:
            predicate, category = edge_label.replace(' ', '_').rsplit('_', 1)
        except ValueError:
            continue

        is_predicate = bmt.get_predicate(predicate) is not None
        is_category = bmt.get_class(category) is not None

        if is_predicate and is_category:
            if 'category' not in G.node[v]:
                G.node[v]['category'] = [category]
            elif category not in G.node[v]['category']:
                G.node[v]['category'].append(category)
                logging.info('from {u} {p} {v} found {v} is a {c}'.format(
                    u=u, p=p, v=v, c=category))
def main(path, output, model):
    if model is not None:
        bmt.load(model)

    t = JsonTransformer()
    t.parse(path)

    def curie_to_label(curie:str):
        """
        Uses the biolink model toolkit to look up an
        element (on the tree rooted at `named thing`
        and `related to`) for a given curie. If none
        can be found then returns the original curie.
        """
        if isinstance(curie, (list, tuple, set)):
            return [curie_to_label(c) for c in curie]
        elif isinstance(curie, str):
            e = bmt.get_by_mapping(curie)
            return e if e is not None else curie
        else:
            return None

    for n, attr in t.graph.nodes(data=True):
        attr['category'] = curie_to_label(attr.get('category'))

    for s, o, attr in t.graph.edges(data=True):
        attr['predicate'] = curie_to_label(attr.get('predicate'))

    t.save(output)
Beispiel #14
0
def test_json_save():
    t = JsonTransformer()
    t.parse(os.path.join(resource_dir, 'valid.json'))
    assert t.graph.number_of_nodes() == 6
    assert t.graph.number_of_edges() == 5

    t.save(os.path.join(target_dir, 'graph.json'))
    assert os.path.exists(os.path.join(target_dir, 'graph.json'))
Beispiel #15
0
def test_export():
    """
    Test export behavior of JsonTransformer
    """
    json_file = os.path.join(resource_dir, 'semmed/gene.json')
    output_file = os.path.join(target_dir, 'semmeddb_export.json')
    jt = JsonTransformer()
    jt.parse(json_file)
    jt.save(output_file)
    assert os.path.isfile(output_file)
Beispiel #16
0
"""
Loads all the turtle files with their required ontologies and transforms them to
json. Then loads all these json files, along with the semmeddb edges.csv and
nodes.csv files, into a single NetworkX graph, and performs `clique_merge` on it.
Finally, saves the resulting NetworkX graph as `clique_merged.csv`
"""

from kgx import ObanRdfTransformer2, JsonTransformer, HgncRdfTransformer, RdfOwlTransformer2
from kgx import clique_merge, make_valid_types

t = RdfOwlTransformer2()
t.parse('data/hp.owl')
t = JsonTransformer(t)
t.save('results/hp.json')

t = RdfOwlTransformer2()
t.parse('data/mondo.owl')
t = JsonTransformer(t)
t.save('results/mondo.json')

t = HgncRdfTransformer()
t.parse('data/hgnc.ttl')
t = JsonTransformer(t)
t.save('results/hgnc.json')

t = ObanRdfTransformer2()
t.add_ontology('data/mondo.owl')
t.add_ontology('data/hp.owl')
t.parse('data/orphanet.ttl')
t = JsonTransformer(t)
t.save('results/orphanet.json')
Beispiel #17
0
"""
This script prepares the clique_merged.json file for uploading to Neo4j
- Removes nodes that cannot be categorized into the biolink model
- Renames edge labels that don't matche the biolink model to "related_to"
- Transforms into CSV format
"""

from kgx import JsonTransformer, PandasTransformer
import bmt

t = JsonTransformer()
t.parse('results/clique_merged.json')
t = PandasTransformer(t)

G = t.graph

size = len(G)

nodes = []

for n, data in G.nodes(data=True):
    data['category'] = [
        c for c in data.get('category', []) if bmt.get_class(c) is not None
    ]
    if data['category'] == []:
        if 'name' in data:
            data['category'] = ['named thing']
        else:
            nodes.append(n)

G.remove_nodes_from(nodes)
Beispiel #18
0
from kgx import JsonTransformer
from pprint import pprint
from terminaltables import AsciiTable
import sys, numpy

if len(sys.argv) < 3:
    min_frequency = 100
else:
    min_frequency = int(sys.argv[2])

if len(sys.argv) < 2:
    quit('Required argument: path to json knowledge graph')
else:
    path = sys.argv[1]

t = JsonTransformer()
t.parse(path)

category_list = []
uncategorized_example = {}
uncategorized_frequency = {}
for n in t.graph.nodes():
    c = t.graph.node[n].get('category')

    if c is None:
        iri = t.graph.node[n].get('iri')
        k = iri.split('/')
        if '_' in k[-1]:
            prefix, _ = k[-1].split('_', 1)
            k = tuple(k[:-1] + [prefix])
        else:
Beispiel #19
0
from kgx import ObanRdfTransformer, JsonTransformer, HgncRdfTransformer
from kgx import clique_merge

t = JsonTransformer()
t.parse('hgnc.json')
t.parse('clinvar.json')
t.parse('omim.json')
t.parse('hpoa.json')
t.parse('orphanet.json')
t.save('merged.json')

t.graph = clique_merge(t.graph)
t.save('clique_merged.json')

Beispiel #20
0
def test_neo_to_graph_download():
    """ downloads a neo4j graph
    """
    return

    subject_label = 'gene'
    object_label = None
    edge_type = None
    stop_after = 100

    output_transformer = JsonTransformer()
    G = output_transformer.graph

    driver = http_gdb('http://localhost:7474', username='', password='')

    subject_label = ':`{}`'.format(subject_label) if isinstance(
        subject_label, str) else ''
    object_label = ':`{}`'.format(object_label) if isinstance(
        object_label, str) else ''
    edge_type = ':`{}`'.format(edge_type) if isinstance(edge_type, str) else ''

    match = 'match (n{})-[e{}]->(m{})'.format(subject_label, edge_type,
                                              object_label)

    results = driver.query('{} return count(*)'.format(match))

    print('Using cyper query: {} return n, e, m'.format(match))

    for a, in results:
        size = a
        break

    if size == 0:
        print('No data available')
        quit()

    page_size = 1_000

    skip_flag = False

    for i in range(0, size, page_size):
        q = '{} return n, e, m skip {} limit {}'.format(match, i, page_size)
        results = driver.query(q)

        for n, e, m in results:
            subject_attr = n['data']
            object_attr = m['data']
            edge_attr = e['data']

            if 'id' not in subject_attr or 'id' not in object_attr:
                if not skip_flag:
                    print('Skipping records that have no id attribute')
                    skip_flag = True
                continue

            s = subject_attr['id']
            o = object_attr['id']

            if 'edge_label' not in edge_attr:
                edge_attr['edge_label'] = e['metadata']['type']

            if 'category' not in subject_attr:
                subject_attr['category'] = n['metadata']['labels']

            if 'category' not in object_attr:
                object_attr['category'] = m['metadata']['labels']

            if s not in G:
                G.add_node(s, **subject_attr)
            if o not in G:
                G.add_node(o, **object_attr)

            G.add_edge(s, o, key=edge_attr['edge_label'], **edge_attr)

        if stop_after is not None and G.number_of_edges() > stop_after:
            break
Beispiel #21
0
from kgx import JsonTransformer, clique_merge

t = JsonTransformer()
t.parse('results/hp.owl')
t.parse('results/mondo.json')
t.parse('results/hgnc.json')
t.parse('results/clinvar.json')
t.parse('results/omim.json')
t.parse('results/hpoa.json')
t.parse('results/orphanet.json')

#t = PandasTransformer(t.graph)
#t.parse('data/semmeddb_edges.csv')
#t.parse('data/semmeddb_nodes.csv')

t.graph = clique_merge(t.graph)
t.save('results/clique_merged.json')
Beispiel #22
0
from kgx import JsonTransformer, clique_merge
import sys

path = sys.argv[1]

t = JsonTransformer()
t.parse(path)

t.graph = clique_merge(t.graph)

t.save('clique_merged.json')
from kgx import ObanRdfTransformer, JsonTransformer, HgncRdfTransformer
from collections import Counter

#o = ObanRdfTransformer()
#o.add_ontology('data/mondo.owl')
#o.add_ontology('data/hp.owl')
#o.add_ontology('data/go.owl')
#o.add_ontology('data/so.owl')
#o.add_ontology('data/ordo.owl')

from rdflib import URIRef

t = HgncRdfTransformer()
t.parse('data/hgnc.ttl')
t = JsonTransformer(t)
t.save('hgnc.json')
quit()

t = ObanRdfTransformer()
t.ontologies = o.ontologies
t.parse('data/orphanet.ttl')
t = JsonTransformer(t)
t.save('orphanet.json')

t = ObanRdfTransformer()
t.ontologies = o.ontologies
t.parse('data/hpoa.ttl')
t = JsonTransformer(t)
t.save('hpoa.json')

t = ObanRdfTransformer()
Beispiel #24
0
from kgx import ObanRdfTransformer, JsonTransformer, HgncRdfTransformer
from kgx import clique_merge

t = JsonTransformer()
#t.parse('hgnc.json')
#t.parse('clinvar.json')
#t.parse('omim.json')
#t.parse('hpoa.json')
#t.parse('orphanet.json')
t.parse('semmeddb.json')
t.parse('merged.json')
t.save('merged.json')

t.graph = clique_merge(t.graph)
t.save('clique_merged.json')