예제 #1
0
def test_merge_no_preserve():
    """
    Test for merging graphs, overwriting conflicting properties
    """
    pt1 = PandasTransformer()
    pt1.parse(os.path.join(resource_dir, 'merge', 'nodes1.tsv'),
              input_format='tsv')
    pt1.parse(os.path.join(resource_dir, 'merge', 'edges1.tsv'),
              input_format='tsv')
    pt2 = PandasTransformer()
    pt2.parse(os.path.join(resource_dir, 'merge', 'nodes2.tsv'),
              input_format='tsv')
    pt2.parse(os.path.join(resource_dir, 'merge', 'edges2.tsv'),
              input_format='tsv')
    merged_graph = merge_all_graphs([pt1.graph, pt2.graph], preserve=False)
    assert len(merged_graph.nodes()) == 6
    assert len(merged_graph.edges()) == 8

    x1 = merged_graph.nodes['x1']
    print(x1)
    assert x1['name'] == 'node x1'

    assert isinstance(x1['category'], list)
    assert list(pt1.graph.nodes['x1']['category'])[0] in x1['category']
    assert list(pt2.graph.nodes['x1']['category'])[0] in x1['category']
    assert x1['p1'] == 'a'
예제 #2
0
def test_merge():
    """
    Test for merging graphs
    """
    pt1 = PandasTransformer()
    pt1.parse(os.path.join(resource_dir, 'merge', 'nodes1.tsv'),
              input_format='tsv')
    pt1.parse(os.path.join(resource_dir, 'merge', 'edges1.tsv'),
              input_format='tsv')
    pt2 = PandasTransformer()
    pt2.parse(os.path.join(resource_dir, 'merge', 'nodes2.tsv'),
              input_format='tsv')
    pt2.parse(os.path.join(resource_dir, 'merge', 'edges2.tsv'),
              input_format='tsv')
    merged_graph = merge_all_graphs([pt1.graph, pt2.graph], preserve=True)
    assert len(merged_graph.nodes()) == 6
    assert len(merged_graph.edges()) == 8

    x1 = merged_graph.nodes['x1']
    assert x1['name'] == 'node x1'

    assert isinstance(x1['category'], list)
    assert 'a' in x1['p1']
    assert '1' in x1['p1']

    x10 = merged_graph.nodes['x10']
    assert x10['id'] == 'x10'
    assert x10['name'] == 'node x10'
예제 #3
0
파일: main.py 프로젝트: todun/kgx
def main(force_transform):
    """
    Goes through a series of data files, transforms them to CSV format and saves
    them. Then re-loads each CSV file and merges them into a single file named
    clique_merged.csv, while performing a clique merge as well as categorizing
    all nodes and edges.
    """
    for filename, constructor in data.items():
        if filename.endswith('.csv') or filename.endswith('.csv.tar'):
            # Already in csv format, no need to transform it
            continue

        out = change_extention(filename, 'csv.tar')

        if not force_transform and os.path.isfile(out):
            # CSV has already been generated, no need to recreate it
            continue

        t = constructor()
        t.parse(filename)
        t = PandasTransformer(t)
        t.save(out)

    t = PandasTransformer()

    for filename in data.keys():
        # Load each CSV into a single transformer
        filename = change_extention(filename, 'csv.tar')
        t.parse(filename)

    t.merge_cliques()
    t.clean_categories()
    t.save('results/clique_merged.csv')
예제 #4
0
def test_mapping():
    """
    create a random graph and save it in different formats
    """
    G = nx.MultiDiGraph()

    N = 100
    E = N * 3
    mapping = {}
    for i in range(0,N+1):
        nid = curie(i)
        mapping[nid] = mapped_curie(i)
        G.add_node(nid, label="node {}".format(i))
    for i in range(1,E):
        s = random_curie(N)
        o = random_curie(N)
        G.add_edge(o,s,predicate='related_to')
    print('Nodes={}'.format(len(G.nodes())))
    mapper.map_graph(G, mapping)
    print("Mapped..")

    count = 0
    for nid in G.nodes():
        src = G.node[nid]['source_curie']
        assert nid.startswith("Y:")
        assert src.startswith("X:")
        count += 1
        if count > 5:
            break

    print("Saving tsv")
    w = PandasTransformer(G)
    w.save("target/maptest.tar")
    w = ObanRdfTransformer(G)
    w.save("target/maptest.ttl")
예제 #5
0
def test_clique_merge():
    """
    Test for clique merge (lenient)
    """
    t = PandasTransformer()
    os.makedirs(target_dir, exist_ok=True)
    t.parse(os.path.join(resource_dir, 'cm_nodes.csv'))
    t.parse(os.path.join(resource_dir, 'cm_edges.csv'))
    t.report()
    cm = CliqueMerge(prefix_prioritization_map)
    cm.build_cliques(t.graph)
    cm.elect_leader()
    updated_graph = cm.consolidate_edges()
    leaders = nx.get_node_attributes(updated_graph, 'clique_leader')
    leader_list = list(leaders.keys())
    leader_list.sort()
    assert len(leader_list) == 2
    n1 = updated_graph.nodes[leader_list[0]]
    assert n1['election_strategy'] == 'PREFIX_PRIORITIZATION'
    assert 'NCBIGene:100302240' in n1['aliases']
    assert 'ENSEMBL:ENSG00000284458' in n1['aliases']
    n2 = updated_graph.nodes[leader_list[1]]
    assert n2['election_strategy'] == 'PREFIX_PRIORITIZATION'
    assert 'NCBIGene:8202' in n2['aliases']
    assert 'OMIM:601937' in n2['aliases']
    assert 'ENSEMBL:ENSG00000124151' not in n2['aliases']
예제 #6
0
def main(path, output, model):
    if model is not None:
        bmt.load(model)

    t = JsonTransformer()
    t.parse(path)
    t = PandasTransformer(t.graph)
    t.save(output)
예제 #7
0
def test_neo_to_graph_transform():
    """
    load from neo4j and transform to nx graph
    """
    nt = NeoTransformer(host='localhost', port='7474', username='******', password='******')
    nt.load()
    nt.report()
    t = PandasTransformer(nt.graph)
    t.save(os.path.join(target_dir, "neo_graph.csv"))
예제 #8
0
def test_csv_to_neo_load():
    """
    load csv to neo4j test
    """
    pt = PandasTransformer()
    pt.parse(os.path.join(resource_dir, "cm_nodes.csv"))
    pt.parse(os.path.join(resource_dir, "cm_edges.csv"))
    nt = NeoTransformer(pt.graph, host='localhost', port='7474', username='******', password='******')
    nt.save_with_unwind()
    nt.neo4j_report()
예제 #9
0
def test_neo_to_graph_transform():
    """
    load from neo4j and transform to nx graph
    """

    n = NeoTransformer()
    n.load()
    n.report()
    t = PandasTransformer(n)
    t.save("target/neo_graph.csv")
예제 #10
0
def test_load():
    """
    Test for loading data into PandasTransformer
    """
    t = PandasTransformer()
    os.makedirs(target_dir, exist_ok=True)
    t.parse(os.path.join(resource_dir, "x1n.csv"))
    t.parse(os.path.join(resource_dir, "x1e.csv"))
    t.report()
    t.save(os.path.join(target_dir, 'x1copy.csv'))
예제 #11
0
def test_neo_to_graph_transform():
    """
    load from neo4j and transform to nx graph
    """
    nt = NeoTransformer(uri=DEFAULT_NEO4J_URL,
                        username=DEFAULT_NEO4J_USERNAME,
                        password=DEFAULT_NEO4J_PASSWORD)
    nt.load()
    nt.report()
    t = PandasTransformer(nt.graph)
    t.save(os.path.join(target_dir, "neo_graph.csv"))
예제 #12
0
def test_load():
    """
    load tests
    """
    t = PandasTransformer()
    t.parse("tests/resources/x1n.csv")
    t.parse("tests/resources/x1e.csv")
    t.report()
    t.save('target/x1copy.csv')
    w = GraphMLTransformer(t)
    w.save("target/x1n.graphml")
예제 #13
0
def test_read_achive():
    """
    Test reading of tar, tar.gz and tar.bz2 archives
    """

    tar_file = os.path.join(target_dir, "semmeddb_test_export.tar")
    tar_gz_file = os.path.join(target_dir, "semmeddb_test_export.tar.gz")
    tar_bz_file = os.path.join(target_dir, "semmeddb_test_export.tar.bz2")

    pt = PandasTransformer()
    pt.parse(tar_file)
    assert not pt.is_empty()

    pt2 = PandasTransformer()
    pt2.parse(tar_gz_file)
    assert not pt2.is_empty()

    pt3 = PandasTransformer()
    pt3.parse(tar_bz_file)
    assert not pt3.is_empty()
예제 #14
0
def test_load_tsv():
    t = PandasTransformer()
    t.parse(os.path.join(resource_dir, 'test_nodes.tsv'), input_format='tsv')
    t.parse(os.path.join(resource_dir, 'test_edges.tsv'), input_format='tsv')

    assert t.graph.number_of_nodes() == 3
    assert t.graph.number_of_edges() == 1

    assert t.graph.nodes['CURIE:123'][
        'description'] == '"Node of type Gene, CURIE:123"'
    assert t.graph.nodes['CURIE:456'][
        'description'] == '"Node of type Disease, CURIE:456"'
예제 #15
0
def test_clique_generation():
    """
    Test for generation of cliques
    """
    t = PandasTransformer()
    t.parse(os.path.join(resource_dir, 'cm_nodes.csv'))
    t.parse(os.path.join(resource_dir, 'cm_edges.csv'))
    t.report()
    cm = CliqueMerge(prefix_prioritization_map)
    cm.build_cliques(t.graph)
    cliques = list(nx.connected_components(cm.clique_graph))
    assert len(cliques) == 2
예제 #16
0
def test_graph_to_neo_load():
    """
    load nx graph to neo4j test
    """

    t = PandasTransformer()
    t.parse("tests/resources/x1n.csv")
    t.parse("tests/resources/x1e.csv")
    t.report()
    n = NeoTransformer(t)
    n.save()
    n.neo4j_report()
예제 #17
0
def test_csv_to_neo_load():
    """
    load csv to neo4j test
    """
    pt = PandasTransformer()
    pt.parse(os.path.join(resource_dir, "cm_nodes.csv"))
    pt.parse(os.path.join(resource_dir, "cm_edges.csv"))
    nt = NeoTransformer(pt.graph,
                        uri=DEFAULT_NEO4J_URL,
                        username=DEFAULT_NEO4J_USERNAME,
                        password=DEFAULT_NEO4J_PASSWORD)
    nt.save()
    nt.neo4j_report()
예제 #18
0
 def parse(self, name: str, data_file: str, source: str) -> None:
     """Processes the data_file.
     Args:
         name: Name of the ontology
         data_file: data file to parse
         source: Source name
     Returns:
          None.
     """
     print(f"Parsing {data_file}")
     transformer = ObographJsonTransformer()
     transformer.parse(data_file, provided_by=source)
     output_transformer = PandasTransformer(transformer.graph)
     output_transformer.save(filename=os.path.join(self.output_dir, f'{name}'), output_format='tsv', mode=None)
예제 #19
0
파일: test_rdf.py 프로젝트: putmantime/kgx
def test_load():
    """
    load tests
    """
    t = ObanRdfTransformer()
    t.parse("tests/resources/monarch/biogrid_test.ttl")
    t.report()
    w1 = PandasTransformer(t)
    w1.save('target/biogrid-e.csv', type='e')
    w1.save('target/biogrid-n.csv', type='n')
    w2 = GraphMLTransformer(t)
    w2.save("target/x1n.graphml")
    w3 = JsonTransformer(t)
    w3.save("target/x1n.json")
예제 #20
0
def test_csv_to_neo_load():
    """
    load csv to neo4j test
    """
    pt = PandasTransformer()
    pt.parse("resources/x1n.csv")
    pt.parse("resources/x1e.csv")
    nt = NeoTransformer(pt.graph,
                        host='http://localhost',
                        port='7474',
                        username='******',
                        password='******')
    nt.save_with_unwind()
    nt.neo4j_report()
예제 #21
0
def test_neo_to_graph_transform():
    """
    load from neo4j and transform to nx graph
    """
    return

    nt = NeoTransformer(host='localhost',
                        port='7474',
                        username='',
                        password='')
    nt.load()
    nt.report()
    t = PandasTransformer(nt.graph)
    t.save("target/neo_graph.csv")
예제 #22
0
def test_filters(query):
    nodes = os.path.join(resource_dir, 'test_nodes2.tsv')
    edges = os.path.join(resource_dir, 'test_edges2.tsv')
    t = PandasTransformer()
    for nf in query[0].keys():
        t.set_node_filter(nf, query[0][nf])

    for ef in query[1].keys():
        t.set_edge_filter(ef, query[1][ef])

    t.parse(nodes, input_format='tsv')
    t.parse(edges, input_format='tsv')
    assert t.graph.number_of_nodes() == query[2]
    assert t.graph.number_of_edges() == query[3]
예제 #23
0
def test_semmeddb_csv_to_tsv():
    """
    Read nodes and edges from CSV and export the resulting graph as an archive
    """
    t = PandasTransformer()
    nodes_file = os.path.join(resource_dir, "semmed/semmeddb_test_nodes.csv")
    edges_file = os.path.join(resource_dir, "semmed/semmeddb_test_edges.csv")
    output = os.path.join(target_dir, "semmeddb_test_tsv_export")

    t.parse(nodes_file)
    t.parse(edges_file)

    # save output as TSV in a tar archive
    t.save(output, extension='tsv')
예제 #24
0
def test_csv_to_neo_load():
    """
    load csv to neo4j test
    """
    return

    pt = PandasTransformer()
    pt.parse("resources/nodes.csv")
    pt.parse("resources/edges.csv")
    nt = NeoTransformer(pt.graph,
                        host='localhost',
                        port='7474',
                        username='',
                        password='')
    nt.save_with_unwind()
    nt.neo4j_report()
예제 #25
0
def test_load():
    """
    load tests
    """
    t = PandasTransformer()
    cwd = os.path.abspath(os.path.dirname(__file__))
    resdir = os.path.join(cwd, 'resources')
    targetdir = os.path.join(cwd, 'target')
    os.makedirs(targetdir, exist_ok=True)

    t.parse(os.path.join(resdir, "x1n.csv"))
    t.parse(os.path.join(resdir, "x1e.csv"))
    t.report()
    t.save(os.path.join(targetdir, 'x1copy.csv'))
    w = GraphMLTransformer(t.graph)
    w.save(os.path.join(targetdir, "x1n.graphml"))
예제 #26
0
def test_owl_load():
    """
    Load a test OWL and export as JSON
    """
    input_file = os.path.join(resource_dir, 'mody.ttl')
    output_archive_file = os.path.join(target_dir, 'mondo_test')
    output_json_file = os.path.join(target_dir, 'mondo_test.json')

    t = RdfOwlTransformer()
    t.parse(input_file, input_format='ttl')
    t.report()

    pt = PandasTransformer(t.graph)
    pt.save(output_archive_file)

    jt = JsonTransformer(t.graph)
    jt.save(output_json_file)
예제 #27
0
def test_load():
    """
    load and save tests
    """
    cwd = os.path.abspath(os.path.dirname(__file__))
    src_path = os.path.join(cwd, 'resources', 'monarch', 'biogrid_test.ttl')
    tpath = os.path.join(cwd, 'target')
    os.makedirs(tpath, exist_ok=True)

    tg_path = os.path.join(tpath, "test_output.ttl")

    # execute ObanRdfTransformer's parse and save function
    t = ObanRdfTransformer()
    t.parse(src_path, input_format="turtle")
    t.save(tg_path, output_format="turtle")
    t.report()

    w1 = PandasTransformer(t.graph)
    w1.save(os.path.join(tpath, 'biogrid-e.csv'), type='e')
    w1.save(os.path.join(tpath, 'biogrid-n.csv'), type='n')

    # read again the source, test graph
    src_graph = rdflib.Graph()
    src_graph.parse(src_path, format="turtle")

    # read again the dumped target graph
    tg_graph = rdflib.Graph()
    tg_graph.parse(tg_path, format="turtle")

    # compare subgraphs from the source and the target graph.
    OBAN = Namespace('http://purl.org/oban/')
    for a in src_graph.subjects(RDF.type, OBAN.association):
        oban_src_graph = rdflib.Graph()
        oban_src_graph += src_graph.triples((a, None, None))
        oban_tg_graph = rdflib.Graph()
        oban_tg_graph += tg_graph.triples((a, None, None))
        # see they are indeed identical (isomorphic)
        if not oban_src_graph.isomorphic(oban_tg_graph):
            raise RuntimeError('The subgraphs whose subject is ' + str(a) +
                               ' are not isomorphic ones.')

    w2 = GraphMLTransformer(t.graph)
    w2.save(os.path.join(tpath, "x1n.graphml"))
    w3 = JsonTransformer(t.graph)
    w3.save(os.path.join(tpath, "x1n.json"))
예제 #28
0
def test_export(query):
    nodes = os.path.join(resource_dir, 'test_nodes.tsv')
    edges = os.path.join(resource_dir, 'test_edges.tsv')
    t = PandasTransformer()
    t.parse(nodes, input_format='tsv')
    t.parse(edges, input_format='tsv')

    assert t.graph.number_of_nodes() == 3
    assert t.graph.number_of_edges() == 1

    output_filename = os.path.join(target_dir, query[0])
    t.save(filename=output_filename, output_format=query[1], mode=query[2])

    if isinstance(query[3], str):
        assert os.path.exists(os.path.join(target_dir, query[3]))
    else:
        assert os.path.exists(os.path.join(target_dir, query[3][0]))
        assert os.path.exists(os.path.join(target_dir, query[3][1]))
예제 #29
0
def test_load():
    """
    load tests
    """
    cwd = os.path.abspath(os.path.dirname(__file__))
    resdir = os.path.join(cwd, 'resources')
    tdir = os.path.join(cwd, 'target')
    os.makedirs(tdir, exist_ok=True)
    
    t = RdfOwlTransformer()
    fn = os.path.join(resdir, "mody.ttl.gz")
    f = gzip.open(fn, 'rb')
    t.parse(f, input_format='ttl')
    t.report()
    w1 = PandasTransformer(t.graph)
    w1.save(os.path.join(tdir, 'mondo-e.csv'), type='e')
    w1.save(os.path.join(tdir, 'mondo-n.csv'), type='n')
    w3 = JsonTransformer(t.graph)
    w3.save(os.path.join(tdir, "mondo.json"))
예제 #30
0
    def parse(self, name: str, data_file: str) -> None:
        """Processes the data_file.

        Args:
            name: Name of the ontology
            data_file: data file to parse

        Returns:
             None.

        """
        logging.info(f"Parsing {data_file}")
        transformer = ObographJsonTransformer()
        transformer.parse(data_file)
        output_transformer = PandasTransformer(transformer.graph)
        output_transformer.save(filename=os.path.join(self.output_dir,
                                                      f'{name}'),
                                extension='tsv',
                                mode=None)