Beispiel #1
0
def test_save1():
    """
    save tests
    """
    src_path = "tests/resources/monarch/biogrid_test.ttl"

    t = ObanRdfTransformer()
    t.parse(src_path, input_format="turtle")
    save(t.graph, 'biogrid')
Beispiel #2
0
def test_validator_rdf():
    """
    use test files
    """
    cwd = os.path.abspath(os.path.dirname(__file__))
    resdir = os.path.join(cwd, 'resources')
    src_path = os.path.join(resdir, 'monarch', 'biogrid_test.ttl')
    t = ObanRdfTransformer()
    t.parse(src_path, input_format="turtle")
    validator = Validator()
    validator.validate(t.graph)
    write_errors(validator)
Beispiel #3
0
def test_mapping():
    """
    create a random graph and save it in different formats
    """
    G = nx.MultiDiGraph()

    N = 100
    E = N * 3
    mapping = {}
    for i in range(0,N+1):
        nid = curie(i)
        mapping[nid] = mapped_curie(i)
        G.add_node(nid, label="node {}".format(i))
    for i in range(1,E):
        s = random_curie(N)
        o = random_curie(N)
        G.add_edge(o,s,predicate='related_to')
    print('Nodes={}'.format(len(G.nodes())))
    mapper.map_graph(G, mapping)
    print("Mapped..")

    count = 0
    for nid in G.nodes():
        src = G.node[nid]['source_curie']
        assert nid.startswith("Y:")
        assert src.startswith("X:")
        count += 1
        if count > 5:
            break

    print("Saving tsv")
    w = PandasTransformer(G)
    w.save("target/maptest.tar")
    w = ObanRdfTransformer(G)
    w.save("target/maptest.ttl")
def test_save_ttl():
    t = ObanRdfTransformer()
    t.parse(os.path.join(resource_dir, 'hpoa_test.ttl'))
    assert t.graph.number_of_nodes() == 10
    assert t.graph.number_of_edges() == 5

    t.save(os.path.join(target_dir, 'hpoa_test_export.ttl'))
    assert os.path.exists(os.path.join(target_dir, 'hpoa_test_export.ttl'))
def test_load_ttl():
    t = ObanRdfTransformer()
    t.parse(os.path.join(resource_dir, 'hpoa_test.ttl'))
    assert t.graph.number_of_nodes() == 10
    assert t.graph.number_of_edges() == 5

    import pprint
    pprint.pprint([x for x in t.graph.nodes(data=True)])
    pprint.pprint([x for x in t.graph.edges(data=True)])

    n1 = t.graph.nodes(data=True)['HP:0000007']
    assert n1['id'] == 'HP:0000007'
    assert n1['provided_by'] == ['hpoa_test.ttl']
    assert n1['category'] == ['biolink:NamedThing']

    e1 = t.graph.get_edge_data('Orphanet:93262', 'HP:0000505')
    data = e1.popitem()
    assert data[1]['subject'] == 'Orphanet:93262'
    assert data[1]['edge_label'] == 'biolink:has_phenotype'
    assert data[1]['object'] == 'HP:0000505'
    assert data[1]['relation'] == 'RO:0002200'
    assert data[1]['provided_by'] == ['hpoa_test.ttl']
    assert data[1]['has_evidence'] == 'ECO:0000304'
Beispiel #6
0
def test_load():
    """
    load and save tests
    """
    cwd = os.path.abspath(os.path.dirname(__file__))
    src_path = os.path.join(cwd, 'resources', 'monarch', 'biogrid_test.ttl')
    tpath = os.path.join(cwd, 'target')
    os.makedirs(tpath, exist_ok=True)

    tg_path = os.path.join(tpath, "test_output.ttl")

    # execute ObanRdfTransformer's parse and save function
    t = ObanRdfTransformer()
    t.parse(src_path, input_format="turtle")
    t.save(tg_path, output_format="turtle")
    t.report()

    w1 = PandasTransformer(t.graph)
    w1.save(os.path.join(tpath, 'biogrid-e.csv'), type='e')
    w1.save(os.path.join(tpath, 'biogrid-n.csv'), type='n')

    # read again the source, test graph
    src_graph = rdflib.Graph()
    src_graph.parse(src_path, format="turtle")

    # read again the dumped target graph
    tg_graph = rdflib.Graph()
    tg_graph.parse(tg_path, format="turtle")

    # compare subgraphs from the source and the target graph.
    OBAN = Namespace('http://purl.org/oban/')
    for a in src_graph.subjects(RDF.type, OBAN.association):
        oban_src_graph = rdflib.Graph()
        oban_src_graph += src_graph.triples((a, None, None))
        oban_tg_graph = rdflib.Graph()
        oban_tg_graph += tg_graph.triples((a, None, None))
        # see they are indeed identical (isomorphic)
        if not oban_src_graph.isomorphic(oban_tg_graph):
            raise RuntimeError('The subgraphs whose subject is ' + str(a) +
                               ' are not isomorphic ones.')

    w2 = GraphMLTransformer(t.graph)
    w2.save(os.path.join(tpath, "x1n.graphml"))
    w3 = JsonTransformer(t.graph)
    w3.save(os.path.join(tpath, "x1n.json"))
Beispiel #7
0
def test_load():
    """
    load TTL and save as CSV
    """
    input_file = os.path.join(resource_dir, 'monarch/biogrid_test.ttl')
    output_file = os.path.join(target_dir, 'test_output.ttl')

    t = ObanRdfTransformer()
    t.parse(input_file, input_format="turtle")
    t.report()
    t.save(output_file, output_format="turtle")

    output_archive_file = os.path.join(target_dir, 'biogrid_test')
    pt = PandasTransformer(t.graph)
    pt.save(output_archive_file)

    # read again the source, test graph
    src_graph = rdflib.Graph()
    src_graph.parse(input_file, format="turtle")

    # read again the dumped target graph
    target_graph = rdflib.Graph()
    target_graph.parse(output_file, format="turtle")

    # compare subgraphs from the source and the target graph.
    OBAN = Namespace('http://purl.org/oban/')
    for a in src_graph.subjects(RDF.type, OBAN.association):
        oban_src_graph = rdflib.Graph()
        oban_src_graph += src_graph.triples((a, None, None))
        oban_tg_graph = rdflib.Graph()
        oban_tg_graph += target_graph.triples((a, None, None))
        # see they are indeed identical (isomorphic)
        if not oban_src_graph.isomorphic(oban_tg_graph):
            print(
                'The subgraphs whose subject is {} are not isomorphic'.format(
                    a))

    # w2 = GraphMLTransformer(t.graph)
    # w2.save(os.path.join(tpath, "x1n.graphml"))
    w3 = JsonTransformer(t.graph)
    w3.save(os.path.join(target_dir, "biogrid_test.json"))
Beispiel #8
0
def test_load():
    """
    load tests
    """
    t = ObanRdfTransformer()
    t.parse("tests/resources/monarch/biogrid_test.ttl")
    t.report()
    w1 = PandasTransformer(t)
    w1.save('target/biogrid-e.csv', type='e')
    w1.save('target/biogrid-n.csv', type='n')
    w2 = GraphMLTransformer(t)
    w2.save("target/x1n.graphml")
    w3 = JsonTransformer(t)
    w3.save("target/x1n.json")
Beispiel #9
0
def test_load():
    """
    create a random graph and save it in different formats
    """
    G = nx.MultiDiGraph()

    # Adjust this to test for scalability
    N = 1000
    E = N * 3
    for i in range(1, N):
        G.add_node(curie(i), label="node {}".format(i))
    for i in range(1, E):
        s = random_curie(N)
        o = random_curie(N)
        G.add_edge(o, s)
    print('Nodes={}'.format(len(G.nodes())))
    rename_all(G)
    print("Saving tsv")
    w = PandasTransformer(source=G)
    w.save("target/random.tar")
    print("Saving ttl")
    w = ObanRdfTransformer(source=G)
    w.save("target/random.ttl")
#o = ObanRdfTransformer()
#o.add_ontology('data/mondo.owl')
#o.add_ontology('data/hp.owl')
#o.add_ontology('data/go.owl')
#o.add_ontology('data/so.owl')
#o.add_ontology('data/ordo.owl')

from rdflib import URIRef

t = HgncRdfTransformer()
t.parse('data/hgnc.ttl')
t = JsonTransformer(t)
t.save('hgnc.json')
quit()

t = ObanRdfTransformer()
t.ontologies = o.ontologies
t.parse('data/orphanet.ttl')
t = JsonTransformer(t)
t.save('orphanet.json')

t = ObanRdfTransformer()
t.ontologies = o.ontologies
t.parse('data/hpoa.ttl')
t = JsonTransformer(t)
t.save('hpoa.json')

t = ObanRdfTransformer()
t.ontologies = o.ontologies
t.parse('data/omim.ttl')
t = JsonTransformer(t)