Esempio n. 1
0
    def testIter(self):
        """PR 1382: adds __iter__ to Dataset"""
        d = Dataset()
        uri_a = URIRef("https://example.com/a")
        uri_b = URIRef("https://example.com/b")
        uri_c = URIRef("https://example.com/c")
        uri_d = URIRef("https://example.com/d")

        d.add_graph(URIRef("https://example.com/g1"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1")
               ))  # pointless addition: duplicates above

        d.add_graph(URIRef("https://example.com/g2"))
        d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2")))
        d.add((uri_a, uri_b, uri_d,
               URIRef("https://example.com/g1")))  # new, uri_d

        # traditional iterator
        i_trad = 0
        for t in d.quads((None, None, None)):
            i_trad += 1

        # new Dataset.__iter__ iterator
        i_new = 0
        for t in d:
            i_new += 1

        self.assertEqual(i_new, i_trad)  # both should be 3
Esempio n. 2
0
    def set_member(self, c_id, m_obj):
        if isinstance(m_obj, Model):
            m_obj = [m_obj]
        elif not isinstance(m_obj, list):
            raise ParseError()

        c_ldp_id = self.marmotta.ldp(encoder.encode(c_id))
        collection = self.get_collection(c_id).pop() # 404 if collection not found

        if len(set([m.id for m in m_obj])) is not len(m_obj):
            raise ForbiddenError()
        if not collection.capabilities.membershipIsMutable:
            raise ForbiddenError()
        if collection.capabilities.restrictedToType:
            for m in m_obj:
                if not(hasattr(m,"datatype") and m.datatype in collection.capabilities.restrictedToType):
                    raise ForbiddenError()
        if collection.capabilities.maxLength >= 0:
            size = self.sparql.size(c_ldp_id).bindings.pop().get(Variable('size'))
            if int(size) > collection.capabilities.maxLength-len(m_obj):
                raise ForbiddenError()#"Operation forbidden. Collection of maximum size {} is full.".format(collection.capabilities.maxLength))

        ds = Dataset()
        ldp = ds.graph(identifier=LDP.ns)
        for m in m_obj:
            m_id = self.marmotta.ldp(encoder.encode(c_id)+"/member/"+encoder.encode(m.id))
            member = ds.graph(identifier=m_id)
            member += self.RDA.object_to_graph(member.identifier,m)
            ldp += LDP.add_contains(c_ldp_id+"/member",m_id,False)
        res = self.sparql.insert(ds)
        if res.status_code is not 200:
            raise DBError()
        return m_obj
Esempio n. 3
0
    def test_simple(self):
        a = datetime.datetime.now()
        seed = [(URIRef(f"urn:example.com/mock/id{i}"),
                 URIRef(f"urn:example.com/mock/rel{i}"),
                 Literal(f"mock-val{i}"),
                 URIRef(f"urn:example.com/mock/context{j}"))
                for i in range(100) for j in range(100)]

        async def seed_store():
            await self.aiotest.addN(seed)

        g, cg, ds = Graph(), ConjunctiveGraph(), Dataset(default_union=True)
        loop = asyncio.get_event_loop()
        loop.run_until_complete(asyncio.gather(seed_store()))

        b = datetime.datetime.now()
        print("seed time ->", b - a)

        async def f():
            for i in (g, cg, ds):
                await async_fill_graph(i, self.aiotest.statements())

        loop.run_until_complete(asyncio.gather(f()))

        for i in (g, cg, ds):
            print(len(i))

        # print("g", [i for i in g])
        # print("cg", [i for i in cg])
        # print("ds", [(i, g.identifier) for i in g for g in ds.graphs()])

        c = datetime.datetime.now()
        print("graph time ->", c - b)

        print("complete time ->", c - a)
Esempio n. 4
0
 def rdf(self):
     try:
         return self.conf['rdf.graph']
     except KeyError:
         if ALLOW_UNCONNECTED_DATA_USERS:
             return Dataset(default_union=True)
         raise DataUserUnconnected('No rdf.graph')
Esempio n. 5
0
    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" %
                           self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix='test',
                                      dir='/tmp',
                                      suffix='.sqlite')
        else:
            self.tmppath = mkdtemp()
        self.graph.open(self.tmppath, create=True)
        self.michel = URIRef(u'michel')
        self.tarek = URIRef(u'tarek')
        self.bob = URIRef(u'bob')
        self.likes = URIRef(u'likes')
        self.hates = URIRef(u'hates')
        self.pizza = URIRef(u'pizza')
        self.cheese = URIRef(u'cheese')

        self.c1 = URIRef(u'context-1')
        self.c2 = URIRef(u'context-2')

        # delete the graph for each test!
        self.graph.remove((None, None, None))
Esempio n. 6
0
 def open(self):
     # XXX: If we have a source that's read only, should we need to set the
     # store separately??
     g0 = Dataset('SPARQLUpdateStore', default_union=True)
     g0.open(tuple(self.conf['rdf.store_conf']))
     self.graph = g0
     return self.graph
def get_fragment(request, subject, predicate, obj, page, graph):
    fragment = Dataset()
    tpf_url = urlparse(request.build_absolute_uri())
    tpf_url = TPF_URL.format(tpf_url.scheme, tpf_url.netloc, graph)
    licenses = []
    neo_licenses = LicenseModel.nodes.filter(graph__exact=graph)
    if subject and subject.startswith(LICENSE_SUBJECT_PREFIX):
        license_id = subject.split('/')[-1]
        neo_licenses.filter(hashed_sets__exact=license_id)
    for neo_license in neo_licenses:
        license_object = ObjectFactory.objectLicense(neo_license)
        license_object = license_object.to_json()
        license_object['compatible_licenses'] = []
        for compatible_neo_license in neo_license.followings.all():
            compatible_license = ObjectFactory.objectLicense(
                compatible_neo_license)
            license_object['compatible_licenses'].append(
                compatible_license.hash())
        licenses.append(license_object)
    rdf_licenses = get_rdf(licenses, graph).triples((subject, predicate, obj))
    total_nb_triples = 0
    for s, p, o in rdf_licenses:
        fragment.add((s, p, o))
        total_nb_triples += 1
    last_result = True
    nb_triple_per_page = total_nb_triples
    _frament_fill_meta(subject, predicate, obj, page, graph, fragment,
                       last_result, total_nb_triples, nb_triple_per_page,
                       request, tpf_url)
    return fragment
Esempio n. 8
0
    def setUp(self):
        try:
            self.graph = Dataset(store=self.store)
        except ImportError:
            raise SkipTest("Dependencies for store '%s' not available!" % self.store)
        if self.store == "SQLite":
            _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite")
        elif self.store == "SPARQLUpdateStore":
            root = HOST + DB
            self.graph.open((root + "sparql", root + "update"))
        else:
            self.tmppath = mkdtemp()

        if self.store != "SPARQLUpdateStore":
            self.graph.open(self.tmppath, create=True)
        self.michel = URIRef("urn:michel")
        self.tarek = URIRef("urn:tarek")
        self.bob = URIRef("urn:bob")
        self.likes = URIRef("urn:likes")
        self.hates = URIRef("urn:hates")
        self.pizza = URIRef("urn:pizza")
        self.cheese = URIRef("urn:cheese")

        # Use regular URIs because SPARQL endpoints like Fuseki alter short names
        self.c1 = URIRef("urn:context-1")
        self.c2 = URIRef("urn:context-2")

        # delete the graph for each test!
        self.graph.remove((None, None, None))
        for c in self.graph.contexts():
            c.remove((None, None, None))
            assert len(c) == 0
            self.graph.remove_graph(c)
Esempio n. 9
0
def proc_table_access_table(opts: argparse.Namespace) -> int:
    """
    Iterate over the table_access table emitting its entries
    :param opts: function arguments
    :return: Graph
    """
    logging.info("Iterating over table_access table")
    process_parsed_args(opts, FileAwareParser.error)
    queries = QueryTexts(I2B2Tables(opts))
    q = queries.ont_session.query(queries.tables.table_access)
    e: TableAccess
    for e in q.all():
        print(f"{e.c_table_cd}", end='')
        if not e.c_table_cd.startswith(
                TABLE_PREFIX) or e.c_table_cd in SKIP_TABLES:
            print(" skipped")
            continue
        g = Dataset()
        nelements = proc_table_access_row(queries, e, g)
        if nelements:
            print(f" {nelements} elements processed")
            dump_as_rdf(g, e.c_table_cd)
            if ONE_TABLE:
                break
    else:
        nelements = 0
    return nelements
Esempio n. 10
0
def validateRDF(file):

    center, line = 66, 70
    print(
        F"\n{'':>16}{'-' * line:^{center}}\n{'|':>16}\t{F'VALIDATING RDF FILE {fileSize(file)}':^{center}}|\n{'':>16}{'-' * line:^{center}}\n"
    )

    start = time()
    from pathlib import Path
    size = Path(file).stat().st_size

    try:

        print("\n\t1. Checking the RDF file.")
        start = time()
        Dataset().parse(file, format="trig")
        print(
            F"\n\t\t>>> ✅ The converted file \n\t\t[{file}] \n\t\tis in a valid RDF format! "
            F"\n\n\t\t>>> We therefore can highly ascertain that the original file "
            F"\n\t\t[{file}]\n\t\tis in a valid RDF format.")
        print(
            "" if start is None else
            F"""\n\t2. {'Parsing time':.<50} {str(timedelta(seconds=time() - start))}"""
        )

    except Exception as err:
        print("\t\t\t>>> ❌ Invalid RDF")
        print(F"\t\t\t>>> [DETAIL ERROR FROM validate_RDF] {err}")

    finally:
        print(
            F"\n\t{'2. Done in':.<53} {str(timedelta(seconds=time() - start))}"
        )
Esempio n. 11
0
    def __init__(self, address=config.BRAIN_URL_LOCAL):
        """
        Interact with Triple store

        Parameters
        ----------
        address: str
            IP address and port of the Triple store
        """

        self.address = address
        self.namespaces = {}
        self.ontology_paths = {}
        self.format = 'trig'
        self.dataset = Dataset()
        self.query_prefixes = read_query('prefixes')

        self._define_namespaces()
        self._get_ontology_path()
        self._bind_namespaces()

        self.my_uri = None

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")

        self._brain_log = config.BRAIN_LOG_ROOT.format(
            datetime.now().strftime('%Y-%m-%d-%H-%M'))

        # Launch first query
        self.count_statements()
Esempio n. 12
0
def open_db(path=DEFAULT_DATABASE_PATH):
    my_graph = Dataset('Sleepycat')
    store_state = my_graph.open(path, create=False)

    assert store_state != NO_STORE, 'Store does not exist'
    assert store_state == VALID_STORE, 'The underlying store is corrupt'

    return my_graph
Esempio n. 13
0
 def open(self):
     import logging
     # XXX: If we have a source that's read only, should we need to set the
     # store separately??
     g0 = Dataset('Sleepycat', default_union=True)
     self.conf['rdf.store'] = 'Sleepycat'
     g0.open(self.conf['rdf.store_conf'], create=True)
     self.graph = g0
     logging.debug("Opened SleepyCatSource")
Esempio n. 14
0
def get_ds0():
    update_endpoint = 'http://localhost:8890/sparql-auth'
    # query_endpoint = 'http://localhost:8890/sparql'
    store = SPARQLUpdateStore(update_endpoint,
                              update_endpoint,
                              autocommit=True)
    store.setHTTPAuth(DIGEST)
    store.setCredentials(user='******', passwd='admin')
    return Dataset(store)
Esempio n. 15
0
 def test_ldp_access_with_ldp(self):
     with app.app_context():
         # todo: post collection to sparql, retrieve via LDP and compare
         c_obj = self.mock.collection()
         self.db.set_collection(c_obj)
         g = Dataset().parse(self.db.marmotta.ldp(encoder.encode(c_obj.id)),
                             format="n3")
         r_obj = self.db.RDA.graph_to_object(g).pop()
         self.assertDictEqual(c_obj.dict(), r_obj.dict())
Esempio n. 16
0
    def test_load_from_file(self):

        ds = Dataset()
        ds.parse("geoStatements.trig", format="trig")

        async def f():
            await self.aiotest.addN(
                (i for i in ds.quads((None, None, None, None))))

        print("ds loaded")
        self.loop.run_until_complete(asyncio.gather(f()))
def load_statements():

    a = datetime.datetime.now()
    logger.info(f"start loading ds at: {a}")
    ds = Dataset()
    ds.parse(STATEMENTS, format=TYPE)
    b = datetime.datetime.now()
    logger.info(f"finished loading ds at: {b}")
    logger.info(f"ds loaded: {ds}")
    logger.info(f"ds loaded in {b - a}")
    return ds
Esempio n. 18
0
 def set_service(self, s_obj):
     ds = Dataset()
     service = ds.graph(identifier=self.marmotta.ldp("service"))
     service += self.RDA.object_to_graph(service.identifier, s_obj)
     ldp = ds.graph(identifier=LDP.ns)
     ldp += LDP.add_contains(self.marmotta.ldp(),service.identifier,False)
     response = self.sparql.insert(ds)
     if response.status_code is 200:
         return s_obj
     else:
         raise DBError()
Esempio n. 19
0
 def __init__(self,
              namespace,
              showObjs=True,
              showClasses=False,
              showNamespace=True):
     self.ds = Dataset()
     self.d = UmlPygraphVizDiagram()
     self.show_objs = showObjs
     self.show_classes = showClasses
     self.namespace = namespace
     self.show_namespaces = showNamespace
     self.add_namespaces(self.namespace)
Esempio n. 20
0
def _get_single_graph_from_trig(trig_file: Optional[str] = None,
                                data: Optional[str] = None) -> rdflib.Graph:
    if trig_file is None and data is None:
        raise RuntimeError("One of trig_file OR data *must* be specified.")

    dataset = Dataset()
    dataset.parse(format="trig", source=trig_file, data=data)
    graphs_with_triples = [g for g in dataset.graphs() if len(g) > 0]
    assert (
        len(graphs_with_triples) == 1
    ), f"Found {len(graphs_with_triples)} non-trivial graphs in {trig_file}. Expected one."
    return graphs_with_triples[0]
Esempio n. 21
0
    def __init__(self, graph_identifier, dataset, variables, headers):
        self._headers = headers
        self._variables = variables

        # TODO: Family is now superseded by a full dataset description in the form of QBer

        # if 'family' in config:
        #     self._family = config['family']
        #     try:
        #         family_def = getattr(mappings, config['family'])
        #         self._nocode = family_def['nocode']
        #         self._integer = family_def['integer']
        #         self._mappings = family_def['mappings']
        #     except:
        #         logger.warning('No family definition found')
        #         self._nocode = []
        #         self._integer = []
        #         self._mappings = {}
        # else:
        #     self._family = None

        # TODO: number_observations is now superseded by a full dataset description in the form of QBer

        # if 'number_observations' in config:
        #     self._number_observations = config['number_observations']
        # else:
        #     self._number_observations = None

        # TODO: stop is now superseded by a full dataset description in the form of QBer
        # self._stop = config['stop']

        # TODO: Now setting these as simple defaults
        self._family = None
        self._number_observations = True
        self._stop = None

        # TODO: Think of what to do here...
        if self._family is None:
            self._VOCAB_URI_PATTERN = "{0}{{}}/{{}}".format(self._VOCAB_BASE)
            self._RESOURCE_URI_PATTERN = "{0}{{}}/{{}}".format(
                self._RESOURCE_BASE)
        else:
            self._VOCAB_URI_PATTERN = "{0}{1}/{{}}/{{}}".format(
                self._VOCAB_BASE, self._family)
            self._RESOURCE_URI_PATTERN = "{0}{1}/{{}}/{{}}".format(
                self._RESOURCE_BASE, self._family)

        self.ds = apply_default_namespaces(Dataset())
        self.g = self.ds.graph(URIRef(graph_identifier))

        self._dataset_name = dataset['name']
        self._dataset_uri = URIRef(dataset['uri'])
Esempio n. 22
0
    def __init__(self, config):
        """Initialize the graph store and a layout.

        NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support
        for Graph Store HTTP protocol
        (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports
        this only in the (currently unreleased) 2.2 branch. It works with Jena,
        which is currently the reference implementation.
        """
        self.config = config
        self.store = plugin.get('Lmdb', Store)(config['location'])
        self.ds = Dataset(self.store, default_union=True)
        self.ds.namespace_manager = nsm
Esempio n. 23
0
    def __init__(self):
        # type: () -> RdfBuilder

        self.ontology_paths = {}
        self.namespaces = {}
        self.dataset = Dataset()

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")

        self._define_namespaces()
        self._bind_namespaces()
        self.define_named_graphs()
        self.load_ontology_integration()
Esempio n. 24
0
    def __init__(self, identifier, columns, schema, metadata_graph, encoding, output_format):
        self.ds = Dataset()
        # self.ds = apply_default_namespaces(Dataset())
        self.g = self.ds.graph(URIRef(identifier))

        self.columns = columns
        self.schema = schema
        self.metadata_graph = metadata_graph
        self.encoding = encoding
        self.output_format = output_format

        self.templates = {}

        self.aboutURLSchema = self.schema.csvw_aboutUrl
Esempio n. 25
0
def create_db(path=DATABASE_FOLDER, db_name='loadstar'):

    path = os.path.join(path, db_name)
    my_graph = Dataset('Sleepycat')
    store_state = my_graph.open(path, create=False)
    assert store_state == NO_STORE, 'There is a database in this path already.'
    if store_state == NO_STORE:
        my_graph.open(path, create=True)
        my_graph.commit()

    else:
        assert store_state == VALID_STORE, 'The underlying store is corrupt'

    return my_graph
Esempio n. 26
0
def test_roundtrip():
    d = Dataset()
    d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
            format="hext",
            publicID=d.default_context.identifier)
    d.default_union = True
    with open(str(
            Path(__file__).parent /
            "test_parser_hext_multigraph.ndjson")) as i:
        ordered_input = "".join(sorted(i.readlines())).strip()

    ordered_output = "\n".join(sorted(
        d.serialize(format="hext").split("\n"))).strip()

    assert ordered_output == ordered_input
Esempio n. 27
0
def fetch(endpoint, timeout=0):
    store = SPARQLStore(endpoint)
    ds = Dataset(store)
    for rs_name, rs_uri in get_rule_sets(endpoint + rs_table_page):
        # TODO: maybe do not discrad but try to merge? no.
        if rs_uri not in rule_sets:
            # TODO: handle possible query error?
            gr = ds.get_context(rs_uri)
            try:
                rs_triples = gr.query(q)
                yield rs_name, rs_uri, rs_triples
                time.sleep(timeout)
            except:
                print('error with', rs_uri)
                other_rs.append(rs_uri)
Esempio n. 28
0
def test_small_string():
    s = """
        ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""]
        ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""]
        ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""]
        ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""]
        ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""]
        ["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""]
        ["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""]
        ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""]
        ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""]
        ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""]
        """
    d = Dataset().parse(data=s, format="hext")
    assert len(d) == 10
Esempio n. 29
0
def test_hext_dataset_linecount():
    d = Dataset()
    assert len(d) == 0
    d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson",
            format="hext",
            publicID=d.default_context.identifier)
    total_triples = 0
    # count all the triples in the Dataset
    for context in d.contexts():
        for triple in context.triples((None, None, None)):
            total_triples += 1
    assert total_triples == 18

    # count the number of serialized Hextuples, should be 22, as per the original file
    lc = len(d.serialize(format="hext").splitlines())
    assert lc == 22
Esempio n. 30
0
    def __init__(self, address=config.BRAIN_URL_LOCAL):
        """
        Interact with Triple store

        Parameters
        ----------
        address: str
            IP address and port of the Triple store
        """

        self.address = address
        self.namespaces = {}
        self.ontology_paths = {}
        self.format = 'trig'
        self.dataset = Dataset()
        self.query_prefixes = """
                    prefix gaf: <http://groundedannotationframework.org/gaf#> 
                    prefix grasp: <http://groundedannotationframework.org/grasp#> 
                    prefix leolaniInputs: <http://cltl.nl/leolani/inputs/>
                    prefix leolaniFriends: <http://cltl.nl/leolani/friends/> 
                    prefix leolaniTalk: <http://cltl.nl/leolani/talk/> 
                    prefix leolaniTime: <http://cltl.nl/leolani/time/> 
                    prefix leolaniWorld: <http://cltl.nl/leolani/world/> 
                    prefix n2mu: <http://cltl.nl/leolani/n2mu/> 
                    prefix ns1: <urn:x-rdflib:> 
                    prefix owl: <http://www.w3.org/2002/07/owl#> 
                    prefix prov: <http://www.w3.org/ns/prov#> 
                    prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
                    prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
                    prefix sem: <http://semanticweb.cs.vu.nl/2009/11/sem/> 
                    prefix skos: <http://www.w3.org/2004/02/skos/core#> 
                    prefix time: <http://www.w3.org/TR/owl-time/#> 
                    prefix xml: <http://www.w3.org/XML/1998/namespace> 
                    prefix xml1: <https://www.w3.org/TR/xmlschema-2/#> 
                    prefix xsd: <http://www.w3.org/2001/XMLSchema#>
                    """

        self._define_namespaces()
        self._get_ontology_path()
        self._bind_namespaces()

        self.my_uri = None

        self._log = logger.getChild(self.__class__.__name__)
        self._log.debug("Booted")