def testIter(self): """PR 1382: adds __iter__ to Dataset""" d = Dataset() uri_a = URIRef("https://example.com/a") uri_b = URIRef("https://example.com/b") uri_c = URIRef("https://example.com/c") uri_d = URIRef("https://example.com/d") d.add_graph(URIRef("https://example.com/g1")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1"))) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g1") )) # pointless addition: duplicates above d.add_graph(URIRef("https://example.com/g2")) d.add((uri_a, uri_b, uri_c, URIRef("https://example.com/g2"))) d.add((uri_a, uri_b, uri_d, URIRef("https://example.com/g1"))) # new, uri_d # traditional iterator i_trad = 0 for t in d.quads((None, None, None)): i_trad += 1 # new Dataset.__iter__ iterator i_new = 0 for t in d: i_new += 1 self.assertEqual(i_new, i_trad) # both should be 3
def set_member(self, c_id, m_obj): if isinstance(m_obj, Model): m_obj = [m_obj] elif not isinstance(m_obj, list): raise ParseError() c_ldp_id = self.marmotta.ldp(encoder.encode(c_id)) collection = self.get_collection(c_id).pop() # 404 if collection not found if len(set([m.id for m in m_obj])) is not len(m_obj): raise ForbiddenError() if not collection.capabilities.membershipIsMutable: raise ForbiddenError() if collection.capabilities.restrictedToType: for m in m_obj: if not(hasattr(m,"datatype") and m.datatype in collection.capabilities.restrictedToType): raise ForbiddenError() if collection.capabilities.maxLength >= 0: size = self.sparql.size(c_ldp_id).bindings.pop().get(Variable('size')) if int(size) > collection.capabilities.maxLength-len(m_obj): raise ForbiddenError()#"Operation forbidden. Collection of maximum size {} is full.".format(collection.capabilities.maxLength)) ds = Dataset() ldp = ds.graph(identifier=LDP.ns) for m in m_obj: m_id = self.marmotta.ldp(encoder.encode(c_id)+"/member/"+encoder.encode(m.id)) member = ds.graph(identifier=m_id) member += self.RDA.object_to_graph(member.identifier,m) ldp += LDP.add_contains(c_ldp_id+"/member",m_id,False) res = self.sparql.insert(ds) if res.status_code is not 200: raise DBError() return m_obj
def test_simple(self): a = datetime.datetime.now() seed = [(URIRef(f"urn:example.com/mock/id{i}"), URIRef(f"urn:example.com/mock/rel{i}"), Literal(f"mock-val{i}"), URIRef(f"urn:example.com/mock/context{j}")) for i in range(100) for j in range(100)] async def seed_store(): await self.aiotest.addN(seed) g, cg, ds = Graph(), ConjunctiveGraph(), Dataset(default_union=True) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.gather(seed_store())) b = datetime.datetime.now() print("seed time ->", b - a) async def f(): for i in (g, cg, ds): await async_fill_graph(i, self.aiotest.statements()) loop.run_until_complete(asyncio.gather(f())) for i in (g, cg, ds): print(len(i)) # print("g", [i for i in g]) # print("cg", [i for i in cg]) # print("ds", [(i, g.identifier) for i in g for g in ds.graphs()]) c = datetime.datetime.now() print("graph time ->", c - b) print("complete time ->", c - a)
def rdf(self): try: return self.conf['rdf.graph'] except KeyError: if ALLOW_UNCONNECTED_DATA_USERS: return Dataset(default_union=True) raise DataUserUnconnected('No rdf.graph')
def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None))
def open(self): # XXX: If we have a source that's read only, should we need to set the # store separately?? g0 = Dataset('SPARQLUpdateStore', default_union=True) g0.open(tuple(self.conf['rdf.store_conf'])) self.graph = g0 return self.graph
def get_fragment(request, subject, predicate, obj, page, graph): fragment = Dataset() tpf_url = urlparse(request.build_absolute_uri()) tpf_url = TPF_URL.format(tpf_url.scheme, tpf_url.netloc, graph) licenses = [] neo_licenses = LicenseModel.nodes.filter(graph__exact=graph) if subject and subject.startswith(LICENSE_SUBJECT_PREFIX): license_id = subject.split('/')[-1] neo_licenses.filter(hashed_sets__exact=license_id) for neo_license in neo_licenses: license_object = ObjectFactory.objectLicense(neo_license) license_object = license_object.to_json() license_object['compatible_licenses'] = [] for compatible_neo_license in neo_license.followings.all(): compatible_license = ObjectFactory.objectLicense( compatible_neo_license) license_object['compatible_licenses'].append( compatible_license.hash()) licenses.append(license_object) rdf_licenses = get_rdf(licenses, graph).triples((subject, predicate, obj)) total_nb_triples = 0 for s, p, o in rdf_licenses: fragment.add((s, p, o)) total_nb_triples += 1 last_result = True nb_triple_per_page = total_nb_triples _frament_fill_meta(subject, predicate, obj, page, graph, fragment, last_result, total_nb_triples, nb_triple_per_page, request, tpf_url) return fragment
def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite") elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef("urn:michel") self.tarek = URIRef("urn:tarek") self.bob = URIRef("urn:bob") self.likes = URIRef("urn:likes") self.hates = URIRef("urn:hates") self.pizza = URIRef("urn:pizza") self.cheese = URIRef("urn:cheese") # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef("urn:context-1") self.c2 = URIRef("urn:context-2") # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c)
def proc_table_access_table(opts: argparse.Namespace) -> int: """ Iterate over the table_access table emitting its entries :param opts: function arguments :return: Graph """ logging.info("Iterating over table_access table") process_parsed_args(opts, FileAwareParser.error) queries = QueryTexts(I2B2Tables(opts)) q = queries.ont_session.query(queries.tables.table_access) e: TableAccess for e in q.all(): print(f"{e.c_table_cd}", end='') if not e.c_table_cd.startswith( TABLE_PREFIX) or e.c_table_cd in SKIP_TABLES: print(" skipped") continue g = Dataset() nelements = proc_table_access_row(queries, e, g) if nelements: print(f" {nelements} elements processed") dump_as_rdf(g, e.c_table_cd) if ONE_TABLE: break else: nelements = 0 return nelements
def validateRDF(file): center, line = 66, 70 print( F"\n{'':>16}{'-' * line:^{center}}\n{'|':>16}\t{F'VALIDATING RDF FILE {fileSize(file)}':^{center}}|\n{'':>16}{'-' * line:^{center}}\n" ) start = time() from pathlib import Path size = Path(file).stat().st_size try: print("\n\t1. Checking the RDF file.") start = time() Dataset().parse(file, format="trig") print( F"\n\t\t>>> ✅ The converted file \n\t\t[{file}] \n\t\tis in a valid RDF format! " F"\n\n\t\t>>> We therefore can highly ascertain that the original file " F"\n\t\t[{file}]\n\t\tis in a valid RDF format.") print( "" if start is None else F"""\n\t2. {'Parsing time':.<50} {str(timedelta(seconds=time() - start))}""" ) except Exception as err: print("\t\t\t>>> ❌ Invalid RDF") print(F"\t\t\t>>> [DETAIL ERROR FROM validate_RDF] {err}") finally: print( F"\n\t{'2. Done in':.<53} {str(timedelta(seconds=time() - start))}" )
def __init__(self, address=config.BRAIN_URL_LOCAL): """ Interact with Triple store Parameters ---------- address: str IP address and port of the Triple store """ self.address = address self.namespaces = {} self.ontology_paths = {} self.format = 'trig' self.dataset = Dataset() self.query_prefixes = read_query('prefixes') self._define_namespaces() self._get_ontology_path() self._bind_namespaces() self.my_uri = None self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted") self._brain_log = config.BRAIN_LOG_ROOT.format( datetime.now().strftime('%Y-%m-%d-%H-%M')) # Launch first query self.count_statements()
def open_db(path=DEFAULT_DATABASE_PATH): my_graph = Dataset('Sleepycat') store_state = my_graph.open(path, create=False) assert store_state != NO_STORE, 'Store does not exist' assert store_state == VALID_STORE, 'The underlying store is corrupt' return my_graph
def open(self): import logging # XXX: If we have a source that's read only, should we need to set the # store separately?? g0 = Dataset('Sleepycat', default_union=True) self.conf['rdf.store'] = 'Sleepycat' g0.open(self.conf['rdf.store_conf'], create=True) self.graph = g0 logging.debug("Opened SleepyCatSource")
def get_ds0(): update_endpoint = 'http://localhost:8890/sparql-auth' # query_endpoint = 'http://localhost:8890/sparql' store = SPARQLUpdateStore(update_endpoint, update_endpoint, autocommit=True) store.setHTTPAuth(DIGEST) store.setCredentials(user='******', passwd='admin') return Dataset(store)
def test_ldp_access_with_ldp(self): with app.app_context(): # todo: post collection to sparql, retrieve via LDP and compare c_obj = self.mock.collection() self.db.set_collection(c_obj) g = Dataset().parse(self.db.marmotta.ldp(encoder.encode(c_obj.id)), format="n3") r_obj = self.db.RDA.graph_to_object(g).pop() self.assertDictEqual(c_obj.dict(), r_obj.dict())
def test_load_from_file(self): ds = Dataset() ds.parse("geoStatements.trig", format="trig") async def f(): await self.aiotest.addN( (i for i in ds.quads((None, None, None, None)))) print("ds loaded") self.loop.run_until_complete(asyncio.gather(f()))
def load_statements(): a = datetime.datetime.now() logger.info(f"start loading ds at: {a}") ds = Dataset() ds.parse(STATEMENTS, format=TYPE) b = datetime.datetime.now() logger.info(f"finished loading ds at: {b}") logger.info(f"ds loaded: {ds}") logger.info(f"ds loaded in {b - a}") return ds
def set_service(self, s_obj): ds = Dataset() service = ds.graph(identifier=self.marmotta.ldp("service")) service += self.RDA.object_to_graph(service.identifier, s_obj) ldp = ds.graph(identifier=LDP.ns) ldp += LDP.add_contains(self.marmotta.ldp(),service.identifier,False) response = self.sparql.insert(ds) if response.status_code is 200: return s_obj else: raise DBError()
def __init__(self, namespace, showObjs=True, showClasses=False, showNamespace=True): self.ds = Dataset() self.d = UmlPygraphVizDiagram() self.show_objs = showObjs self.show_classes = showClasses self.namespace = namespace self.show_namespaces = showNamespace self.add_namespaces(self.namespace)
def _get_single_graph_from_trig(trig_file: Optional[str] = None, data: Optional[str] = None) -> rdflib.Graph: if trig_file is None and data is None: raise RuntimeError("One of trig_file OR data *must* be specified.") dataset = Dataset() dataset.parse(format="trig", source=trig_file, data=data) graphs_with_triples = [g for g in dataset.graphs() if len(g) > 0] assert ( len(graphs_with_triples) == 1 ), f"Found {len(graphs_with_triples)} non-trivial graphs in {trig_file}. Expected one." return graphs_with_triples[0]
def __init__(self, graph_identifier, dataset, variables, headers): self._headers = headers self._variables = variables # TODO: Family is now superseded by a full dataset description in the form of QBer # if 'family' in config: # self._family = config['family'] # try: # family_def = getattr(mappings, config['family']) # self._nocode = family_def['nocode'] # self._integer = family_def['integer'] # self._mappings = family_def['mappings'] # except: # logger.warning('No family definition found') # self._nocode = [] # self._integer = [] # self._mappings = {} # else: # self._family = None # TODO: number_observations is now superseded by a full dataset description in the form of QBer # if 'number_observations' in config: # self._number_observations = config['number_observations'] # else: # self._number_observations = None # TODO: stop is now superseded by a full dataset description in the form of QBer # self._stop = config['stop'] # TODO: Now setting these as simple defaults self._family = None self._number_observations = True self._stop = None # TODO: Think of what to do here... if self._family is None: self._VOCAB_URI_PATTERN = "{0}{{}}/{{}}".format(self._VOCAB_BASE) self._RESOURCE_URI_PATTERN = "{0}{{}}/{{}}".format( self._RESOURCE_BASE) else: self._VOCAB_URI_PATTERN = "{0}{1}/{{}}/{{}}".format( self._VOCAB_BASE, self._family) self._RESOURCE_URI_PATTERN = "{0}{1}/{{}}/{{}}".format( self._RESOURCE_BASE, self._family) self.ds = apply_default_namespaces(Dataset()) self.g = self.ds.graph(URIRef(graph_identifier)) self._dataset_name = dataset['name'] self._dataset_uri = URIRef(dataset['uri'])
def __init__(self, config): """Initialize the graph store and a layout. NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support for Graph Store HTTP protocol (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports this only in the (currently unreleased) 2.2 branch. It works with Jena, which is currently the reference implementation. """ self.config = config self.store = plugin.get('Lmdb', Store)(config['location']) self.ds = Dataset(self.store, default_union=True) self.ds.namespace_manager = nsm
def __init__(self): # type: () -> RdfBuilder self.ontology_paths = {} self.namespaces = {} self.dataset = Dataset() self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted") self._define_namespaces() self._bind_namespaces() self.define_named_graphs() self.load_ontology_integration()
def __init__(self, identifier, columns, schema, metadata_graph, encoding, output_format): self.ds = Dataset() # self.ds = apply_default_namespaces(Dataset()) self.g = self.ds.graph(URIRef(identifier)) self.columns = columns self.schema = schema self.metadata_graph = metadata_graph self.encoding = encoding self.output_format = output_format self.templates = {} self.aboutURLSchema = self.schema.csvw_aboutUrl
def create_db(path=DATABASE_FOLDER, db_name='loadstar'): path = os.path.join(path, db_name) my_graph = Dataset('Sleepycat') store_state = my_graph.open(path, create=False) assert store_state == NO_STORE, 'There is a database in this path already.' if store_state == NO_STORE: my_graph.open(path, create=True) my_graph.commit() else: assert store_state == VALID_STORE, 'The underlying store is corrupt' return my_graph
def test_roundtrip(): d = Dataset() d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson", format="hext", publicID=d.default_context.identifier) d.default_union = True with open(str( Path(__file__).parent / "test_parser_hext_multigraph.ndjson")) as i: ordered_input = "".join(sorted(i.readlines())).strip() ordered_output = "\n".join(sorted( d.serialize(format="hext").split("\n"))).strip() assert ordered_output == ordered_input
def fetch(endpoint, timeout=0): store = SPARQLStore(endpoint) ds = Dataset(store) for rs_name, rs_uri in get_rule_sets(endpoint + rs_table_page): # TODO: maybe do not discrad but try to merge? no. if rs_uri not in rule_sets: # TODO: handle possible query error? gr = ds.get_context(rs_uri) try: rs_triples = gr.query(q) yield rs_name, rs_uri, rs_triples time.sleep(timeout) except: print('error with', rs_uri) other_rs.append(rs_uri)
def test_small_string(): s = """ ["http://example.com/s01", "http://example.com/a", "http://example.com/Type1", "globalId", "", ""] ["http://example.com/s01", "http://example.com/label", "This is a Label", "http://www.w3.org/1999/02/22-rdf-syntax-ns#langString", "en", ""] ["http://example.com/s01", "http://example.com/comment", "This is a comment", "http://www.w3.org/2001/XMLSchema#string", "", ""] ["http://example.com/s01", "http://example.com/creationDate", "2021-12-01", "http://www.w3.org/2001/XMLSchema#date", "", ""] ["http://example.com/s01", "http://example.com/creationTime", "2021-12-01T12:13:00", "http://www.w3.org/2001/XMLSchema#dateTime", "", ""] ["http://example.com/s01", "http://example.com/age", "42", "http://www.w3.org/2001/XMLSchema#integer", "", ""] ["http://example.com/s01", "http://example.com/trueFalse", "false", ",http://www.w3.org/2001/XMLSchema#boolean", "", ""] ["http://example.com/s01", "http://example.com/op1", "http://example.com/o1", "globalId", "", ""] ["http://example.com/s01", "http://example.com/op1", "http://example.com/o2", "globalId", "", ""] ["http://example.com/s01", "http://example.com/op2", "http://example.com/o3", "globalId", "", ""] """ d = Dataset().parse(data=s, format="hext") assert len(d) == 10
def test_hext_dataset_linecount(): d = Dataset() assert len(d) == 0 d.parse(Path(__file__).parent / "test_parser_hext_multigraph.ndjson", format="hext", publicID=d.default_context.identifier) total_triples = 0 # count all the triples in the Dataset for context in d.contexts(): for triple in context.triples((None, None, None)): total_triples += 1 assert total_triples == 18 # count the number of serialized Hextuples, should be 22, as per the original file lc = len(d.serialize(format="hext").splitlines()) assert lc == 22
def __init__(self, address=config.BRAIN_URL_LOCAL): """ Interact with Triple store Parameters ---------- address: str IP address and port of the Triple store """ self.address = address self.namespaces = {} self.ontology_paths = {} self.format = 'trig' self.dataset = Dataset() self.query_prefixes = """ prefix gaf: <http://groundedannotationframework.org/gaf#> prefix grasp: <http://groundedannotationframework.org/grasp#> prefix leolaniInputs: <http://cltl.nl/leolani/inputs/> prefix leolaniFriends: <http://cltl.nl/leolani/friends/> prefix leolaniTalk: <http://cltl.nl/leolani/talk/> prefix leolaniTime: <http://cltl.nl/leolani/time/> prefix leolaniWorld: <http://cltl.nl/leolani/world/> prefix n2mu: <http://cltl.nl/leolani/n2mu/> prefix ns1: <urn:x-rdflib:> prefix owl: <http://www.w3.org/2002/07/owl#> prefix prov: <http://www.w3.org/ns/prov#> prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix sem: <http://semanticweb.cs.vu.nl/2009/11/sem/> prefix skos: <http://www.w3.org/2004/02/skos/core#> prefix time: <http://www.w3.org/TR/owl-time/#> prefix xml: <http://www.w3.org/XML/1998/namespace> prefix xml1: <https://www.w3.org/TR/xmlschema-2/#> prefix xsd: <http://www.w3.org/2001/XMLSchema#> """ self._define_namespaces() self._get_ontology_path() self._bind_namespaces() self.my_uri = None self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted")