Python Dataset.parse Examples, rdflib.Dataset.parse Python Examples

Example #1

0

Show file

File: configure_test_server.py Project: engsterhold/aiohttp_rdf4j

def load_statements():

    a = datetime.datetime.now()
    logger.info(f"start loading ds at: {a}")
    ds = Dataset()
    ds.parse(STATEMENTS, format=TYPE)
    b = datetime.datetime.now()
    logger.info(f"finished loading ds at: {b}")
    logger.info(f"ds loaded: {ds}")
    logger.info(f"ds loaded in {b - a}")
    return ds

Example #2

0

Show file

    def test_load_from_file(self):

        ds = Dataset()
        ds.parse("geoStatements.trig", format="trig")

        async def f():
            await self.aiotest.addN(
                (i for i in ds.quads((None, None, None, None))))

        print("ds loaded")
        self.loop.run_until_complete(asyncio.gather(f()))

Example #3

0

Show file

File: rdf.py Project: GSS-Cogs/gss-utils

def _get_single_graph_from_trig(trig_file: Optional[str] = None,
                                data: Optional[str] = None) -> rdflib.Graph:
    if trig_file is None and data is None:
        raise RuntimeError("One of trig_file OR data *must* be specified.")

    dataset = Dataset()
    dataset.parse(format="trig", source=trig_file, data=data)
    graphs_with_triples = [g for g in dataset.graphs() if len(g) > 0]
    assert (
        len(graphs_with_triples) == 1
    ), f"Found {len(graphs_with_triples)} non-trivial graphs in {trig_file}. Expected one."
    return graphs_with_triples[0]

Example #4

0

Show file

def test_roundtrip():
    d = Dataset()
    d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
            format="hext",
            publicID=d.default_context.identifier)
    d.default_union = True
    with open(str(
            Path(__file__).parent /
            "test_parser_hext_multigraph.ndjson")) as i:
        ordered_input = "".join(sorted(i.readlines())).strip()

    ordered_output = "\n".join(sorted(
        d.serialize(format="hext").split("\n"))).strip()

    assert ordered_output == ordered_input

Example #5

0

Show file

def test_hext_dataset_linecount():
    d = Dataset()
    assert len(d) == 0
    d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
            format="hext",
            publicID=d.default_context.identifier)
    total_triples = 0
    # count all the triples in the Dataset
    for context in d.contexts():
        for triple in context.triples((None, None, None)):
            total_triples += 1
    assert total_triples == 18

    # count the number of serialized Hextuples, should be 22, as per the original file
    lc = len(d.serialize(format="hext").splitlines())
    assert lc == 22

Example #6

0

Show file

def test_hext_json_representation():
    """Tests to see if every link in the ND-JSON Hextuple result is, in fact, JSON"""
    d = Dataset()
    trig_data = """
            PREFIX ex: <http://example.com/>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

            ex:g1 {
                ex:s1
                    ex:p1 ex:o1 , ex:o2 ;
                    ex:p2 [
                        a owl:Thing ;
                        rdf:value "thingy" ;
                    ] ;
                    ex:p3 "Object 3" , "Object 4 - English"@en ;
                    ex:p4 "2021-12-03"^^xsd:date ;
                    ex:p5 42 ;
                    ex:p6 "42" ;
                .
            }

            ex:g2 {
                ex:s1
                    ex:p1 ex:o1 , ex:o2 ;
                .
                ex:s11 ex:p11 ex:o11 , ex:o12 .
            }

            # default graph triples
            ex:s1 ex:p1 ex:o1 , ex:o2 .
            ex:s21 ex:p21 ex:o21 , ex:o22 .
           """
    d.parse(data=trig_data, format="trig")
    out = d.serialize(format="hext")
    for line in out.splitlines():
        j = json.loads(line)
        assert isinstance(j, list)

Example #7

0

Show file

File: update-named-graphs.py Project: swiss-art-research-net/bso-data-pipeline

def performUpdate(options):
    endpoint = options['endpoint']
    inputFile = options['inputfile']
    updateCondition = options['updatecondition']
    preprocessupdate = options['preprocessupdate']
    limit = int(options['limit'])
    offset = int(options['offset'])

    inputData = Dataset()

    print("Parsing input data...")
    devnull = inputData.parse(inputFile, format='trig')
    print("Found %d named graphs" % len([
        d
        for d in list(inputData.contexts()) if d.identifier.startswith("http")
    ]))

    headers = {'Accept': 'text/turtle'}

    # Query the endpoint and determine which graphs are new, changed, or unchanged
    graphs = {'new': [], 'changed': [], 'unchanged': []}

    queryTemplate = """
    CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <%s> { ?s ?p ?o }}
    """

    print("Comparing with named graphs at endpoint %s" % endpoint)
    for context in tqdm([
            d for d in list(inputData.contexts())
            if d.identifier.startswith("http")
    ][offset:offset + limit]):
        r = requests.get(endpoint,
                         headers=headers,
                         params={"query": queryTemplate % context.identifier})
        if r.ok:
            remoteGraph = Graph()
            remoteGraph.parse(data=r.text, format='turtle')
            if not len(remoteGraph):
                graphs['new'].append((context, False))
            elif graphsAreTheSame(context, remoteGraph, preprocessupdate):
                graphs['unchanged'].append((context, remoteGraph))
            else:
                graphs['changed'].append((context, remoteGraph))

        else:
            print(r.text)

    # Output statistics:
    print("\nComparison Result:")
    print("%d graph%s %s not exist at the endpoint and will be added" %
          (len(graphs['new']), "" if len(graphs['new']) == 1 else "s",
           "does" if len(graphs['new']) == 1 else "do"))
    print("%d graph%s already exist%s but %s different in the input file" %
          (len(graphs['changed']), "" if len(graphs['changed']) == 1 else "s",
           "s" if len(graphs['changed']) == 1 else "",
           "is" if len(graphs['changed']) == 1 else "are"))
    print("%d graph%s %s identical in both the input file and endpoint" %
          (len(graphs['unchanged']), "" if len(graphs['unchanged']) == 1 else
           "s", "is" if len(graphs['unchanged']) == 1 else "are"))

    # All new graphs should be included in the update
    graphsToUpdate = [d[0] for d in graphs['new']]

    # Only graphs where the new graph matches the update condition should be updated
    # If no update condition is set, all changed should be updated
    if updateCondition:
        count = 0
        for graphPair in graphs['changed']:
            for result in graphPair[1].query(updateCondition):
                if result:
                    graphsToUpdate.append(graphPair[0])
                    count += 1
        print(
            "\n%d out of %d graph%s will be overwritten based on the update condition"
            % (count, len(graphs['changed']),
               "" if len(graphs['changed']) == 1 else "s"))
    else:
        graphsToUpdate += [d[0] for d in graphs['changed']]

    # Perform update
    for g in tqdm(graphsToUpdate):
        putGraph(g, endpoint)

Example #8

0

Show file

File: app.py Project: charlesvardeman/Flask-REST-Dorking

app = Flask(__name__)
api = Api(app)

# set up a custom formatter to return turtle in text/plain to browsers
custom_formatter = flask_rdf.FormatSelector()
custom_formatter.wildcard_mimetype = 'application/ld+json'
custom_formatter.add_format('application/ld+json', 'json-ld')
custom_decorator = flask_rdf.flask.Decorator(custom_formatter)



ds = Dataset(default_union=True)

with open('./dectectorfinalstate.owl', "r") as f:
    result = ds.parse(f, format="application/rdf+xml")

class HelloWorld(Resource):
    @custom_decorator
    def get(self):
        return ds

api.add_resource(HelloWorld, '/detectorfinalstate')


@app.route("/")
def main():
    # This is cached, so for development it is better
    # to use make_response
    # return send_file('templates/index.html')'
    return make_response(open('templates/index.html').read())

Example #9

0

Show file

def test_hext_dataset():
    """Tests context-aware (multigraph) data"""
    d = Dataset()
    trig_data = """
            PREFIX ex: <http://example.com/>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>

            ex:g1 {
                ex:s1
                    ex:p1 ex:o1 , ex:o2 ;
                    ex:p2 [
                        a owl:Thing ;
                        rdf:value "thingy" ;
                    ] ;
                    ex:p3 "Object 3" , "Object 4 - English"@en ;
                    ex:p4 "2021-12-03"^^xsd:date ;
                    ex:p5 42 ;
                    ex:p6 "42" ;
                .
            }

            ex:g2 {
                ex:s1
                    ex:p1 ex:o1 , ex:o2 ;
                .
                ex:s11 ex:p11 ex:o11 , ex:o12 .
            }

            # default graph triples
            ex:s1 ex:p1 ex:o1 , ex:o2 .
            ex:s21 ex:p21 ex:o21 , ex:o22 .
           """
    d.parse(data=trig_data,
            format="trig",
            publicID=d.default_context.identifier)
    out = d.serialize(format="hext")
    # note: cant' test for BNs in result as they will be different ever time
    testing_lines = [
        [
            False,
            '["http://example.com/s21", "http://example.com/p21", "http://example.com/o21", "globalId", "", ""]'
        ],
        [
            False,
            '["http://example.com/s21", "http://example.com/p21", "http://example.com/o22", "globalId", "", ""]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", ""]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", ""]'
        ],
        [
            False,
            '["http://example.com/s11", "http://example.com/p11", "http://example.com/o12", "globalId", "", "http://example.com/g2"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g2"]'
        ],
        [
            False,
            '["http://example.com/s11", "http://example.com/p11", "http://example.com/o11", "globalId", "", "http://example.com/g2"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g2"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p1", "http://example.com/o2", "globalId", "", "http://example.com/g1"]'
        ],
        [False, '["http://example.com/s1", "http://example.com/p2"'],
        [
            False,
            '"http://www.w3.org/1999/02/22-rdf-syntax-ns#value", "thingy", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'
        ],
        [
            False,
            '"http://www.w3.org/1999/02/22-rdf-syntax-ns#type", "http://www.w3.org/2002/07/owl#Thing", "globalId", "", "http://example.com/g1"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p3", "Object 4 - English", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", "http://example.com/g1"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p6", "42", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p4", "2021-12-03", "http://www.w3.org/2001/XMLSchema#date", "", "http://example.com/g1"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p1", "http://example.com/o1", "globalId", "", "http://example.com/g1"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p5", "42", "http://www.w3.org/2001/XMLSchema#integer", "", "http://example.com/g1"]'
        ],
        [
            False,
            '["http://example.com/s1", "http://example.com/p3", "Object 3", "http://www.w3.org/2001/XMLSchema#string", "", "http://example.com/g1"]'
        ],
    ]
    for line in out.splitlines():
        for test in testing_lines:
            if test[1] in line:
                test[0] = True

    assert all([x[0] for x in testing_lines])