Esempio n. 1
0
    def test_multiple_file_load_rdf(self):
        """
Coverage:

    * KnowledgeGraph.load_rdf() load RDF from multiple files using a wildcard expression
        """
        # create a KnowledgeGraph object
        kg = kglab.KnowledgeGraph()

        # load RDF from a file1 into KG
        kg.load_rdf("dat/gorm.ttl", format="ttl")

        # load RDF from a file2 into KG
        kg.load_rdf("dat/nom.ttl", format="ttl")

        measure = kglab.Measure()
        measure.measure_graph(kg)
        sequential_edge_count = measure.get_edge_count()
        sequential_node_count = measure.get_node_count()

        # load RDF from all files (file1 and file2) matching the
        # expression into KG
        kg_multifile = kglab.KnowledgeGraph()
        kg_multifile.load_rdf("dat/*m.ttl", format="ttl")

        measure.reset()
        measure.measure_graph(kg_multifile)
        multifile_edge_count = measure.get_edge_count()
        multifile_node_count = measure.get_node_count()

        # ic(multifile_edge_count)
        # ic(multifile_node_count)
        assert multifile_edge_count == sequential_edge_count
        assert multifile_node_count == sequential_node_count
Esempio n. 2
0
    def test_load_save_measure(self):
        """
Coverage:

    * KnowledgeGraph() constructor
    * KnowledgeGraph.load_rdf() from pathlib.Path, urlpath.URL
    * KnowledgeGraph.safe_rdf()
    * KnowledgeGraph.load_jsonld()
    * KnowledgeGraph.save_jsonld()

    * Measure() constructor
    * Measure.measure_graph()
    * Measure.get_node_count()
        """
        tmp = tempfile.NamedTemporaryFile(mode="w+b", delete=False)

        try:
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", message="unclosed file*")

                # load RDF from urlpath.URL
                kg = kglab.KnowledgeGraph()
                path = urlpath.URL(
                    "https://storage.googleapis.com/kglab-tutorial/foaf.rdf")
                kg.load_rdf(path, format="xml")
                tmp.close()

                # save RDF to local file reference
                kg.save_rdf(tmp.name)
                tmp.close()

                # load RDF from pathlib.Path
                kg = kglab.KnowledgeGraph()
                path = pathlib.Path(tmp.name)
                kg.load_rdf(path)
                tmp.close()

                # save JSON-LD to local file reference
                kg.save_jsonld(tmp.name)
                tmp.close()

                # load JSON-LD from pathlib.Path
                kg = kglab.KnowledgeGraph()
                path = pathlib.Path(tmp.name)
                kg.load_jsonld(path)

                # measure graph
                measure = kglab.Measure()
                measure.measure_graph(kg)

                # verify
                self.assertTrue(measure.get_node_count() == 35)
                self.assertTrue(measure.get_edge_count() == 62)
        finally:
            os.unlink(tmp.name)
            tmp.close()
Esempio n. 3
0
    def test_load_parquet_gs(self):
        """
Coverage:

    * KnowledgeGraph() constructor
    * KnowledgeGraph.load_parquet()
    * KnowledgeGraph.query_as_df()
    * KnowledgeGraph.query()
    * KnowledgeGraph.n3fy_row()
    * KnowledgeGraph.n3fy()
        """
        kg = kglab.KnowledgeGraph(
            namespaces={"doap": "http://usefulinc.com/ns/doap#"})

        path = "gs://kglab-tutorial/foaf.parquet"
        kg.load_parquet(path)

        sparql = """
            SELECT ?x ?name
            WHERE {
                ?x rdf:type doap:Project .
                ?x doap:name ?name
            }
        """

        df = kg.query_as_df(sparql)

        # handle `cuDF` dataframes (GPUs enabled)
        if not isinstance(df, pd.DataFrame):
            df = df.to_pandas()

        row = df.iloc[0]
        val = row["name"]

        self.assertTrue(val == "Fantasy Fame Game")
Esempio n. 4
0
    def test_import_roam(self):
        """
Coverage:

    * KnowledgeGraph.import_roam() import JSON from Roam Research export
        """
        # create a KnowledgeGraph object
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message="unclosed file*")
            kg = kglab.KnowledgeGraph()

            # load JSON export into KG
            path = pathlib.Path("dat/roam.json")
            kg.import_roam(path)

            measure = kglab.Measure()
            measure.measure_graph(kg)

            node_count = measure.get_node_count()
            edge_count = measure.get_edge_count()

            # ic(node_count)
            # ic(edge_count)
            assert node_count == 44
            assert edge_count == 208
Esempio n. 5
0
    def test_load_parquet_gs(self):
        """
Coverage:

    * KnowledgeGraph() constructor
    * KnowledgeGraph.load_parquet()
    * KnowledgeGraph.query_as_df()
    * KnowledgeGraph.query()
    * KnowledgeGraph.n3fy_row()
    * KnowledgeGraph.n3fy()
        """
        kg = kglab.KnowledgeGraph(
            namespaces={"doap": "http://usefulinc.com/ns/doap#"})

        path = "gs://kglab-tutorial/foaf.parquet"
        kg.load_parquet(path)

        sparql = """
            SELECT ?x ?name
            WHERE {
                ?x rdf:type doap:Project .
                ?x doap:name ?name
            }
        """

        df = kg.query_as_df(sparql)
        row = df.iloc[0]
        self.assertTrue(df.iloc[0]["name"] == "Fantasy Fame Game")
Esempio n. 6
0
    def test_multiple_file_load_parquet(self):
        """
Coverage:

    * KnowledgeGraph.load_parquet() load jsonld from multiple files using a wildcard expression
        """
        # create a KnowledgeGraph object
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore", message="unclosed file*")
            kg = kglab.KnowledgeGraph()

            # load parquet from a file1 into KG
            kg.load_parquet("dat/gorm.parquet")

            # load parquet from a file2 into KG
            kg.load_parquet("dat/nom.parquet")

            measure = kglab.Measure()
            measure.measure_graph(kg)
            sequential_edge_count = measure.get_edge_count()
            sequential_node_count = measure.get_node_count()
            # ic(sequential_edge_count)
            # ic(sequential_node_count)

            # load parquet from all files (file1 and file2) matching the
            # expression into KG
            kg_multifile = kglab.KnowledgeGraph()
            kg_multifile.load_parquet("dat/*m.parquet")

            measure.reset()
            measure.measure_graph(kg_multifile)
            multifile_edge_count = measure.get_edge_count()
            multifile_node_count = measure.get_node_count()

            # ic(multifile_edge_count)
            # ic(multifile_node_count)
            assert multifile_edge_count == sequential_edge_count
            assert multifile_node_count == sequential_node_count
Esempio n. 7
0
    def test_single_file_load_rdf(self):
        """
Coverage:

    * KnowledgeGraph.load_rdf() load RDF from a single local file (str)
        """
        # create a KnowledgeGraph object
        kg = kglab.KnowledgeGraph()

        # load RDF from a file
        kg.load_rdf("dat/gorm.ttl", format="ttl")
        measure = kglab.Measure()

        measure.measure_graph(kg)
        edge_count = measure.get_edge_count()
        node_count = measure.get_node_count()

        assert edge_count == 25
        assert node_count == 15
Esempio n. 8
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-

import kglab

# create a KnowledgeGraph object
kg = kglab.KnowledgeGraph()

# load RDF from a URL
kg.load_rdf("https://storage.googleapis.com/kglab-tutorial/foaf.rdf",
            format="xml")

# measure the graph
measure = kglab.Measure()
measure.measure_graph(kg)

print("edges: {}\n".format(measure.get_edge_count()))
print("nodes: {}\n".format(measure.get_node_count()))

# serialize as a string in "Turtle" TTL format
ttl = kg.save_rdf_text()
print("```")
print(ttl[:999])
print("```")