def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None))
def get_fragment(request, subject, predicate, obj, page, graph): fragment = Dataset() tpf_url = urlparse(request.build_absolute_uri()) tpf_url = TPF_URL.format(tpf_url.scheme, tpf_url.netloc, graph) licenses = [] neo_licenses = LicenseModel.nodes.filter(graph__exact=graph) if subject and subject.startswith(LICENSE_SUBJECT_PREFIX): license_id = subject.split('/')[-1] neo_licenses.filter(hashed_sets__exact=license_id) for neo_license in neo_licenses: license_object = ObjectFactory.objectLicense(neo_license) license_object = license_object.to_json() license_object['compatible_licenses'] = [] for compatible_neo_license in neo_license.followings.all(): compatible_license = ObjectFactory.objectLicense( compatible_neo_license) license_object['compatible_licenses'].append( compatible_license.hash()) licenses.append(license_object) rdf_licenses = get_rdf(licenses, graph).triples((subject, predicate, obj)) total_nb_triples = 0 for s, p, o in rdf_licenses: fragment.add((s, p, o)) total_nb_triples += 1 last_result = True nb_triple_per_page = total_nb_triples _frament_fill_meta(subject, predicate, obj, page, graph, fragment, last_result, total_nb_triples, nb_triple_per_page, request, tpf_url) return fragment
def __init__(self, address=config.BRAIN_URL_LOCAL): """ Interact with Triple store Parameters ---------- address: str IP address and port of the Triple store """ self.address = address self.namespaces = {} self.ontology_paths = {} self.format = 'trig' self.dataset = Dataset() self.query_prefixes = read_query('prefixes') self._define_namespaces() self._get_ontology_path() self._bind_namespaces() self.my_uri = None self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted") self._brain_log = config.BRAIN_LOG_ROOT.format( datetime.now().strftime('%Y-%m-%d-%H-%M')) # Launch first query self.count_statements()
def open(self): # XXX: If we have a source that's read only, should we need to set the # store separately?? g0 = Dataset('SPARQLUpdateStore', default_union=True) g0.open(tuple(self.conf['rdf.store_conf'])) self.graph = g0 return self.graph
def set_member(self, c_id, m_obj): if isinstance(m_obj, Model): m_obj = [m_obj] elif not isinstance(m_obj, list): raise ParseError() c_ldp_id = self.marmotta.ldp(encoder.encode(c_id)) collection = self.get_collection(c_id).pop() # 404 if collection not found if len(set([m.id for m in m_obj])) is not len(m_obj): raise ForbiddenError() if not collection.capabilities.membershipIsMutable: raise ForbiddenError() if collection.capabilities.restrictedToType: for m in m_obj: if not(hasattr(m,"datatype") and m.datatype in collection.capabilities.restrictedToType): raise ForbiddenError() if collection.capabilities.maxLength >= 0: size = self.sparql.size(c_ldp_id).bindings.pop().get(Variable('size')) if int(size) > collection.capabilities.maxLength-len(m_obj): raise ForbiddenError()#"Operation forbidden. Collection of maximum size {} is full.".format(collection.capabilities.maxLength)) ds = Dataset() ldp = ds.graph(identifier=LDP.ns) for m in m_obj: m_id = self.marmotta.ldp(encoder.encode(c_id)+"/member/"+encoder.encode(m.id)) member = ds.graph(identifier=m_id) member += self.RDA.object_to_graph(member.identifier,m) ldp += LDP.add_contains(c_ldp_id+"/member",m_id,False) res = self.sparql.insert(ds) if res.status_code is not 200: raise DBError() return m_obj
def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest("Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp(prefix="test", dir="/tmp", suffix=".sqlite") elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef("urn:michel") self.tarek = URIRef("urn:tarek") self.bob = URIRef("urn:bob") self.likes = URIRef("urn:likes") self.hates = URIRef("urn:hates") self.pizza = URIRef("urn:pizza") self.cheese = URIRef("urn:cheese") # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef("urn:context-1") self.c2 = URIRef("urn:context-2") # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c)
def open_db(path=DEFAULT_DATABASE_PATH): my_graph = Dataset('Sleepycat') store_state = my_graph.open(path, create=False) assert store_state != NO_STORE, 'Store does not exist' assert store_state == VALID_STORE, 'The underlying store is corrupt' return my_graph
def open(self): import logging # XXX: If we have a source that's read only, should we need to set the # store separately?? g0 = Dataset('Sleepycat', default_union=True) self.conf['rdf.store'] = 'Sleepycat' g0.open(self.conf['rdf.store_conf'], create=True) self.graph = g0 logging.debug("Opened SleepyCatSource")
def dump_as_rdf(g: Dataset, table_name: str) -> bool: """ Dump the contents of Graph g in RDF turtle :param g: Dataset to dump :param table_name: name of the base table :return: success indicator """ # Propagate the mapped concepts up the tree def add_to_ancestors(s: URIRef, vm: URIRef): g.add((s, ISO['enumeratedConceptualDomain.hasMember'], vm)) for parent in g.objects(s, SKOS.broader): add_to_ancestors(parent, vm) if COMPUTE_MEMBERS and EXPLICIT_MEMBERS: for subj, obj in g.subject_objects(SKOS.exactMatch): add_to_ancestors(subj, obj) # TODO: this gives us a list of all concepts in the scheme... useful? for scheme, tc in g.subject_objects(SKOS.hasTopConcept): for member in g.objects( tc, ISO['enumeratedConceptualDomain.hasMember']): g.add((scheme, ISO['enumeratedConceptualDomain.hasMember'], member)) for name, ns in namespaces.items(): g.bind(name.lower(), ns) outfile = os.path.join(DATA_DIR, table_name + '.ttl') print(f"Saving output to {outfile}") g.serialize(outfile, format='turtle') print(f"{len(g)} triples written") return True
def test_load_from_file(self): ds = Dataset() ds.parse("geoStatements.trig", format="trig") async def f(): await self.aiotest.addN( (i for i in ds.quads((None, None, None, None)))) print("ds loaded") self.loop.run_until_complete(asyncio.gather(f()))
def set_service(self, s_obj): ds = Dataset() service = ds.graph(identifier=self.marmotta.ldp("service")) service += self.RDA.object_to_graph(service.identifier, s_obj) ldp = ds.graph(identifier=LDP.ns) ldp += LDP.add_contains(self.marmotta.ldp(),service.identifier,False) response = self.sparql.insert(ds) if response.status_code is 200: return s_obj else: raise DBError()
def load_statements(): a = datetime.datetime.now() logger.info(f"start loading ds at: {a}") ds = Dataset() ds.parse(STATEMENTS, format=TYPE) b = datetime.datetime.now() logger.info(f"finished loading ds at: {b}") logger.info(f"ds loaded: {ds}") logger.info(f"ds loaded in {b - a}") return ds
def __init__(self, namespace, showObjs=True, showClasses=False, showNamespace=True): self.ds = Dataset() self.d = UmlPygraphVizDiagram() self.show_objs = showObjs self.show_classes = showClasses self.namespace = namespace self.show_namespaces = showNamespace self.add_namespaces(self.namespace)
def _get_single_graph_from_trig(trig_file: Optional[str] = None, data: Optional[str] = None) -> rdflib.Graph: if trig_file is None and data is None: raise RuntimeError("One of trig_file OR data *must* be specified.") dataset = Dataset() dataset.parse(format="trig", source=trig_file, data=data) graphs_with_triples = [g for g in dataset.graphs() if len(g) > 0] assert ( len(graphs_with_triples) == 1 ), f"Found {len(graphs_with_triples)} non-trivial graphs in {trig_file}. Expected one." return graphs_with_triples[0]
def __init__(self, config): """Initialize the graph store and a layout. NOTE: `rdflib.Dataset` requires a RDF 1.1 compliant store with support for Graph Store HTTP protocol (https://www.w3.org/TR/sparql11-http-rdf-update/). Blazegraph supports this only in the (currently unreleased) 2.2 branch. It works with Jena, which is currently the reference implementation. """ self.config = config self.store = plugin.get('Lmdb', Store)(config['location']) self.ds = Dataset(self.store, default_union=True) self.ds.namespace_manager = nsm
def __init__(self, identifier, columns, schema, metadata_graph, encoding, output_format): self.ds = Dataset() # self.ds = apply_default_namespaces(Dataset()) self.g = self.ds.graph(URIRef(identifier)) self.columns = columns self.schema = schema self.metadata_graph = metadata_graph self.encoding = encoding self.output_format = output_format self.templates = {} self.aboutURLSchema = self.schema.csvw_aboutUrl
def __init__(self): # type: () -> RdfBuilder self.ontology_paths = {} self.namespaces = {} self.dataset = Dataset() self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted") self._define_namespaces() self._bind_namespaces() self.define_named_graphs() self.load_ontology_integration()
def fetch(endpoint, timeout=0): store = SPARQLStore(endpoint) ds = Dataset(store) for rs_name, rs_uri in get_rule_sets(endpoint + rs_table_page): # TODO: maybe do not discrad but try to merge? no. if rs_uri not in rule_sets: # TODO: handle possible query error? gr = ds.get_context(rs_uri) try: rs_triples = gr.query(q) yield rs_name, rs_uri, rs_triples time.sleep(timeout) except: print('error with', rs_uri) other_rs.append(rs_uri)
def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None))
def proc_table_access_table(opts: argparse.Namespace) -> int: """ Iterate over the table_access table emitting its entries :param opts: function arguments :return: Graph """ logging.info("Iterating over table_access table") process_parsed_args(opts, FileAwareParser.error) queries = QueryTexts(I2B2Tables(opts)) q = queries.ont_session.query(queries.tables.table_access) e: TableAccess for e in q.all(): print(f"{e.c_table_cd}", end='') if not e.c_table_cd.startswith( TABLE_PREFIX) or e.c_table_cd in SKIP_TABLES: print(" skipped") continue g = Dataset() nelements = proc_table_access_row(queries, e, g) if nelements: print(f" {nelements} elements processed") dump_as_rdf(g, e.c_table_cd) if ONE_TABLE: break else: nelements = 0 return nelements
class DefaultSource(RDFSource): """ Reads from and queries against a configured database. The default configuration. The database store is configured with:: "rdf.source" = "default" "rdf.store" = <your rdflib store name here> "rdf.store_conf" = <your rdflib store configuration here> Leaving unconfigured simply gives an in-memory data store. """ def open(self): self.graph = Dataset(self.conf['rdf.store'], default_union=True) self.graph.open(self.conf['rdf.store_conf'], create=True)
def rdf(self): try: return self.conf['rdf.graph'] except KeyError: if ALLOW_UNCONNECTED_DATA_USERS: return Dataset(default_union=True) raise DataUserUnconnected('No rdf.graph')
def test_simple(self): a = datetime.datetime.now() seed = [(URIRef(f"urn:example.com/mock/id{i}"), URIRef(f"urn:example.com/mock/rel{i}"), Literal(f"mock-val{i}"), URIRef(f"urn:example.com/mock/context{j}")) for i in range(100) for j in range(100)] async def seed_store(): await self.aiotest.addN(seed) g, cg, ds = Graph(), ConjunctiveGraph(), Dataset(default_union=True) loop = asyncio.get_event_loop() loop.run_until_complete(asyncio.gather(seed_store())) b = datetime.datetime.now() print("seed time ->", b - a) async def f(): for i in (g, cg, ds): await async_fill_graph(i, self.aiotest.statements()) loop.run_until_complete(asyncio.gather(f())) for i in (g, cg, ds): print(len(i)) # print("g", [i for i in g]) # print("cg", [i for i in cg]) # print("ds", [(i, g.identifier) for i in g for g in ds.graphs()]) c = datetime.datetime.now() print("graph time ->", c - b) print("complete time ->", c - a)
def validateRDF(file): center, line = 66, 70 print( F"\n{'':>16}{'-' * line:^{center}}\n{'|':>16}\t{F'VALIDATING RDF FILE {fileSize(file)}':^{center}}|\n{'':>16}{'-' * line:^{center}}\n" ) start = time() from pathlib import Path size = Path(file).stat().st_size try: print("\n\t1. Checking the RDF file.") start = time() Dataset().parse(file, format="trig") print( F"\n\t\t>>> ✅ The converted file \n\t\t[{file}] \n\t\tis in a valid RDF format! " F"\n\n\t\t>>> We therefore can highly ascertain that the original file " F"\n\t\t[{file}]\n\t\tis in a valid RDF format.") print( "" if start is None else F"""\n\t2. {'Parsing time':.<50} {str(timedelta(seconds=time() - start))}""" ) except Exception as err: print("\t\t\t>>> ❌ Invalid RDF") print(F"\t\t\t>>> [DETAIL ERROR FROM validate_RDF] {err}") finally: print( F"\n\t{'2. Done in':.<53} {str(timedelta(seconds=time() - start))}" )
def __init__(self, showObjs, showClasses, namespace): self.ds = Dataset() self.d = UmlPygraphVizDiagram() self.show_objs = showObjs self.show_classes = showClasses self.namespace = namespace self.add_namespaces(self.namespace)
def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'urn:michel') self.tarek = URIRef(u'urn:tarek') self.bob = URIRef(u'urn:bob') self.likes = URIRef(u'urn:likes') self.hates = URIRef(u'urn:hates') self.pizza = URIRef(u'urn:pizza') self.cheese = URIRef(u'urn:cheese') # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef(u'urn:context-1') self.c2 = URIRef(u'urn:context-2') # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c)
def main(csvfile, linkPredicate, destination): g = buildLinkset(csvfile=csvfile, linkPredicate=linkPredicate, identifier=create.term('id/linkset/rijksmuseum/')) dsG = rdflib.Dataset() dsG.add_graph(g) DATE = Literal(datetime.datetime.now().strftime('%Y-%m-%d'), datatype=XSD.datetime) rdfSubject.db = dsG ds = Linkset( create.term('id/linkset/rijksmuseum/'), name=[Literal("Rijksmuseum person linkset", lang='en')], description=[ Literal( "Dataset that links Rijksmuseum persons to Wikidata and Ecartico. Data harvested from Europeana and Ecartico.", lang='en') ], dateModified=DATE, dcdate=DATE, dcmodified=DATE, target=[ create.term('id/rijksmuseum/'), create.term('id/ecartico/'), URIRef("https://wikidata.org/") ], linkPredicate=[linkPredicate]) linksetDs = Dataset( create.term('id/linkset/'), name=[Literal("Linkset collection", lang='en')], description=["Collection of linksets stored in this triplestore."]) linksetDs.subset = [ds] linksetDs.hasPart = [ds] ds.isPartOf = linksetDs ds.inDataset = linksetDs dsG.bind('void', void) dsG.bind('dcterms', dcterms) dsG.bind('schema', schema) dsG.serialize(destination=destination, format='trig')
def __init__(self, address=config.BRAIN_URL_LOCAL): """ Interact with Triple store Parameters ---------- address: str IP address and port of the Triple store """ self.address = address self.namespaces = {} self.ontology_paths = {} self.format = 'trig' self.dataset = Dataset() self.query_prefixes = """ prefix gaf: <http://groundedannotationframework.org/gaf#> prefix grasp: <http://groundedannotationframework.org/grasp#> prefix leolaniInputs: <http://cltl.nl/leolani/inputs/> prefix leolaniFriends: <http://cltl.nl/leolani/friends/> prefix leolaniTalk: <http://cltl.nl/leolani/talk/> prefix leolaniTime: <http://cltl.nl/leolani/time/> prefix leolaniWorld: <http://cltl.nl/leolani/world/> prefix n2mu: <http://cltl.nl/leolani/n2mu/> prefix ns1: <urn:x-rdflib:> prefix owl: <http://www.w3.org/2002/07/owl#> prefix prov: <http://www.w3.org/ns/prov#> prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> prefix sem: <http://semanticweb.cs.vu.nl/2009/11/sem/> prefix skos: <http://www.w3.org/2004/02/skos/core#> prefix time: <http://www.w3.org/TR/owl-time/#> prefix xml: <http://www.w3.org/XML/1998/namespace> prefix xml1: <https://www.w3.org/TR/xmlschema-2/#> prefix xsd: <http://www.w3.org/2001/XMLSchema#> """ self._define_namespaces() self._get_ontology_path() self._bind_namespaces() self.my_uri = None self._log = logger.getChild(self.__class__.__name__) self._log.debug("Booted")
def dataset(self): #pdb.set_trace() if hasattr(self._connection, 'dataset'): return getattr(self._connection, 'dataset') if self.store=='Sleepycat': dataset = Dataset(store=self.store, default_union=True) dataset.open(self.store_path, create = True) else: self.store = Virtuoso(self.connection) #dataset = Dataset(store=self.store, default_union=True) dataset = ConjunctiveGraph(store=self.store,identifier=CENDARI) self.store.connection # force connection setattr(self._connection, 'dataset', dataset) nm = NamespaceManager(dataset) for (prefix, ns) in INIT_NS.iteritems(): nm.bind(prefix, ns) dataset.namespace_manager = nm return dataset
def get_ds0(): update_endpoint = 'http://localhost:8890/sparql-auth' # query_endpoint = 'http://localhost:8890/sparql' store = SPARQLUpdateStore(update_endpoint, update_endpoint, autocommit=True) store.setHTTPAuth(DIGEST) store.setCredentials(user='******', passwd='admin') return Dataset(store)
def test_ldp_access_with_ldp(self): with app.app_context(): # todo: post collection to sparql, retrieve via LDP and compare c_obj = self.mock.collection() self.db.set_collection(c_obj) g = Dataset().parse(self.db.marmotta.ldp(encoder.encode(c_obj.id)), format="n3") r_obj = self.db.RDA.graph_to_object(g).pop() self.assertDictEqual(c_obj.dict(), r_obj.dict())
def set_collection(self, c_obj, over_write=False): if isinstance(c_obj, Model): c_obj = [c_obj] elif not isinstance(c_obj, list): raise ParseError() # create LD collection and declare as ldp:BasicContainer ds = Dataset() ldp = ds.graph(identifier=LDP.ns) for c in c_obj: c_id = encoder.encode(c.id) collection = ds.graph(identifier=self.marmotta.ldp(c_id)) collection += self.RDA.object_to_graph(collection.identifier, c) ldp += LDP.add_contains(self.marmotta.ldp(), collection.identifier) member = ds.graph(identifier=self.marmotta.ldp(c_id+'/member')) ldp += LDP.add_contains(collection.identifier, member.identifier) ins = self.sparql.insert(ds) if ins.status_code is 200: return c_obj else: raise DBError()
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://purl.org/net/nlprepository/spl-ddi-annotation-poc#") ds.namespace_manager.bind("prov","http://www.w3.org/ns/prov#") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") bindings = g.query(interactSelect) for b in bindings: npURI = URIRef(b['inter'] + "-nanopub") headURI = URIRef(b['inter'] + "-head") aURI = URIRef(b['inter'] + "-assertion") pubInfoURI = URIRef(b['inter'] + "-pubInfo") provURI = URIRef(b['inter'] + "-provenance") head = ds.add_graph(headURI) head.add((npURI, RDF.type, np['Nanopublication'])) head.add((aURI, RDF.type, np['Assertion'])) head.add((provURI, RDF.type, np['Provenance'])) head.add((pubInfoURI, RDF.type, np['PublicationInfo'])) head.add((npURI, np['hasAssertion'], aURI)) head.add((npURI, np['hasProvenance'], provURI)) head.add((npURI, np['hasPublicationInfo'], pubInfoURI)) #print head.serialize() a = ds.add_graph(aURI) a.add((b['s'], URIRef('http://dbmi-icode-01.dbmi.pitt.edu/dikb/vocab/interactsWith'), b['o'])) a.add((b['s'], RDF.type, sio["SIO_010038"])) a.add((b['o'], RDF.type, sio["SIO_010038"])) prov = ds.add_graph(provURI) prov.add((aURI, w3prov['wasDerivedFrom'], b['inter'])) print ds.serialize(format='trig')
def __init__(self, graph_identifier, dataset, variables, headers): self._headers = headers self._variables = variables # TODO: Family is now superseded by a full dataset description in the form of QBer # if 'family' in config: # self._family = config['family'] # try: # family_def = getattr(mappings, config['family']) # self._nocode = family_def['nocode'] # self._integer = family_def['integer'] # self._mappings = family_def['mappings'] # except: # logger.warning('No family definition found') # self._nocode = [] # self._integer = [] # self._mappings = {} # else: # self._family = None # TODO: number_observations is now superseded by a full dataset description in the form of QBer # if 'number_observations' in config: # self._number_observations = config['number_observations'] # else: # self._number_observations = None # TODO: stop is now superseded by a full dataset description in the form of QBer # self._stop = config['stop'] # TODO: Now setting these as simple defaults self._family = None self._number_observations = True self._stop = None # TODO: Think of what to do here... if self._family is None: self._VOCAB_URI_PATTERN = "{0}{{}}/{{}}".format(self._VOCAB_BASE) self._RESOURCE_URI_PATTERN = "{0}{{}}/{{}}".format( self._RESOURCE_BASE) else: self._VOCAB_URI_PATTERN = "{0}{1}/{{}}/{{}}".format( self._VOCAB_BASE, self._family) self._RESOURCE_URI_PATTERN = "{0}{1}/{{}}/{{}}".format( self._RESOURCE_BASE, self._family) self.ds = apply_default_namespaces(Dataset()) self.g = self.ds.graph(URIRef(graph_identifier)) self._dataset_name = dataset['name'] self._dataset_uri = URIRef(dataset['uri'])
def test_hext_json_representation(): """Tests to see if every link in the ND-JSON Hextuple result is, in fact, JSON""" d = Dataset() trig_data = """ PREFIX ex: <http://example.com/> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> ex:g1 { ex:s1 ex:p1 ex:o1 , ex:o2 ; ex:p2 [ a owl:Thing ; rdf:value "thingy" ; ] ; ex:p3 "Object 3" , "Object 4 - English"@en ; ex:p4 "2021-12-03"^^xsd:date ; ex:p5 42 ; ex:p6 "42" ; . } ex:g2 { ex:s1 ex:p1 ex:o1 , ex:o2 ; . ex:s11 ex:p11 ex:o11 , ex:o12 . } # default graph triples ex:s1 ex:p1 ex:o1 , ex:o2 . ex:s21 ex:p21 ex:o21 , ex:o22 . """ d.parse(data=trig_data, format="trig") out = d.serialize(format="hext") for line in out.splitlines(): j = json.loads(line) assert isinstance(j, list)
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#") ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/") ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#") ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#") ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/") ds.namespace_manager.bind("mp", "http://purl.org/mp/") assertionCount = 1 enzymeCount = 1 pddiD = dict([line.split(',',1) for line in open('../../data/np-graphs/processed-dikb-ddis-for-nanopub.csv')]) cL = dict([line.split('\t') for line in open('../../data/chebi_mapping.txt')]) pL = dict([line.split('\t') for line in open('../../data/pro_mapping.txt')]) substrateD = {} inhibitorD = {} bindings = g.query(interactSelect) for b in bindings: if( pddiD.has_key(str(b['c'].decode('utf-8'))) ): tempClaim = pddiD[ str(b['c'].decode('utf-8')) ] claimInfo = tempClaim.split(',') claimSub = claimInfo[1] claimObj = claimInfo[2] predicateType = claimInfo[0].strip('\n') if(predicateType == "increases_auc"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 bn1 = BNode('1') bn2 = BNode('2') bn3 = BNode('3') bn4 = BNode('4') bn5 = BNode('5') bn6 = BNode('6') bn7 = BNode('7') bn8 = BNode('8') bn9 = BNode('9') bn10 = BNode('10') assertionLabel = cL[claimSub.strip('\n')].strip('\n') + " - " + cL[claimObj.strip('\n')].strip('\n') + " potential drug-drug interaction" a = ds.add_graph((aURI)) a.add(( aURI, RDF.type, np.assertion)) a.add(( aURI, RDF.type, owl.Class)) a.add(( aURI, RDFS.label, (Literal(assertionLabel.lower())))) a.add(( aURI, RDFS.subClassOf, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000000"))) a.add(( bn1, RDF.type, owl.Restriction)) a.add(( bn1, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136"))) a.add(( bn2, RDF.type, owl.Class)) a.add(( bn3, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000012"))) a.add(( bn5, RDF.first, bn4)) a.add(( bn3, RDF.rest, bn5)) a.add(( bn4, RDF.type, owl.Restriction)) a.add(( bn4, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052"))) a.add(( bn4, owl.hasValue, URIRef(claimSub.strip('\n')))) a.add(( bn5, RDF.rest, RDF.nil)) a.add(( bn2, owl.intersectionOf, bn3)) a.add(( bn1, owl.someValuesFrom, bn2)) a.add(( aURI, RDFS.subClassOf, bn1)) a.add(( bn6, RDF.type, owl.Restriction)) a.add(( bn6, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/IAO_0000136"))) a.add(( bn7, RDF.type, owl.Class)) a.add(( bn8, RDF.first, URIRef("http://purl.obolibrary.org/obo/DIDEO_00000013"))) a.add(( bn10, RDF.first, bn9)) a.add(( bn8, RDF.rest, bn10)) a.add(( bn9, RDF.type, owl.Restriction)) a.add(( bn9, owl.onProperty, URIRef("http://purl.obolibrary.org/obo/BFO_0000052"))) a.add(( bn9, owl.hasValue, URIRef(claimObj.strip('\n')))) a.add(( bn10, RDF.rest, RDF.nil)) a.add(( bn7, owl.intersectionOf, bn8)) a.add(( bn6, owl.someValuesFrom, bn7)) a.add(( aURI, RDFS.subClassOf, bn6)) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) elif(predicateType == "substrate_of"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 dLabel = cL[claimSub.strip('\n')].strip('\n') eLabel = pL[claimObj.strip('\n')].strip('\n') assertionLabel = dLabel + " substrate of " + eLabel a = ds.add_graph((aURI)) ds.add(( aURI, RDF.type, np.assertion)) ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower()))) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) a.add(( URIRef(claimObj.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/OBI_0000427"))) a.add(( URIRef(claimObj.strip('\n')), RDFS.label, Literal(eLabel.lower()))) a.add(( URIRef(claimObj.strip('\n')), URIRef("http://purl.obolibrary.org/obo/DIDEO_00000096"), URIRef(claimSub.strip('\n')))) a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431"))) a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower()))) elif(predicateType == "inhibits"): aURI = URIRef("http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion-%s") % assertionCount assertionCount += 1 dLabel = cL[claimSub.strip('\n')].strip('\n') eLabel = pL[claimObj.strip('\n')].strip('\n') assertionLabel = dLabel + " inhibits " + eLabel a = ds.add_graph((aURI)) ds.add(( aURI, RDF.type, np.assertion)) ds.add(( aURI, RDFS.label, Literal(assertionLabel.lower()))) ds.add(( aURI, mp.formalizes, b['c'])) ds.add(( b['c'], mp.formalizedAs, aURI)) a.add(( URIRef(claimSub.strip('\n')), RDF.type, URIRef("http://purl.obolibrary.org/obo/CHEBI_24431"))) a.add(( URIRef(claimSub.strip('\n')), RDFS.label, Literal(dLabel.lower()))) a.add(( URIRef(claimSub.strip('\n')), URIRef("http://purl.obolibrary.org/obo/RO_0002449"), URIRef(claimObj.strip('\n')))) print ds.serialize(format='trig')
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD from rdflib.namespace import FOAF from rdflib.serializer import Serializer import rdflib.resource import uuid # Create a default dataset graph. ds = Dataset(default_union=True) # J SON-LD serializer requires an explicit context. # https://github.com/RDFLib/rdflib-jsonld # context = {"@vocab": "http://purl.org/dc/terms/", "@language": "en"} context = {"prov": "http://www.w3.org/ns/prov#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "xsd": "http://www.w3.org/2001/XMLSchema#", "dc": "http://purl.org/dc/terms"} # Define some namespaces PROV = Namespace("http://www.w3.org/ns/prov#") ORE = Namespace("http://www.openarchives.org/ore/terms/") OWL = Namespace("http://www.w3.org/2002/07/owl#") DC = Namespace("http://purl.org/dc/terms/") UUIDNS = Namespace("urn:uuid:") DOCKER = Namespace("http://w3id.org/daspos/docker#") # W3C namespace: POSIX = Namespace("http://www.w3.org/ns/posix/stat#") ACL = Namespace("http://www.w3.org/ns/auth/acl#") # DASPOS namespaces
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') else: self.tmppath = mkdtemp() self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'michel') self.tarek = URIRef(u'tarek') self.bob = URIRef(u'bob') self.likes = URIRef(u'likes') self.hates = URIRef(u'hates') self.pizza = URIRef(u'pizza') self.cheese = URIRef(u'cheese') self.c1 = URIRef(u'context-1') self.c2 = URIRef(u'context-2') # delete the graph for each test! self.graph.remove((None, None, None)) def tearDown(self): self.graph.close() if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # added graph exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEquals(len(g1), 0) g1.add( (self.tarek, self.likes, self.pizza) ) # added graph still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEquals(len(g1), 1) g1.remove( (self.tarek, self.likes, self.pizza) ) # added graph is empty self.assertEquals(len(g1), 0) # graph still exists, although empty self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): self.graph.add(( self.tarek, self.likes, self.pizza)) self.assertEquals(len(self.graph), 1) # only default exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEquals(len(self.graph), 0) # default still exists self.assertEquals(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
def __init__(self, output_filename='output.png'): self.ds = Dataset() #self.d = UmlGraphVizDiagram(output_filename) self.d = UmlPygraphVizDiagram(output_filename)
class RDFtoUmlDiagram(): """ Transform a RDF dataset to an UML diagram """ def __init__(self, output_filename='output.png'): self.ds = Dataset() #self.d = UmlGraphVizDiagram(output_filename) self.d = UmlPygraphVizDiagram(output_filename) def load_rdf(self, filename, input_format=None): if input_format: rdf_format = input_format elif filename is not sys.stdin: format_list = {'.xml': 'xml', '.rdf': 'xml', '.owl': 'xml', '.n3': 'n3', '.ttl': 'turtle', '.nt': 'nt', '.trig': 'trig', '.nq': 'nquads', '': 'turtle'} extension = splitext(filename.name)[1] rdf_format = format_list[extension] else: rdf_format = 'turtle' temp = self.ds.graph("file://"+filename.name) temp.parse(filename.name, format=rdf_format) def add_namespaces(self, namespaces): if namespaces: for ns in namespaces: self.ds.namespace_manager.bind(ns[0],ns[1]) def start_subgraph(self, graph_name): self.d.start_subgraph(graph_name.strip('[<>]:_')) def close_subgraph(self): self.d.close_subgraph() def add_object_node(self, object_name, classes_name, attributes): self.d.add_object_node(self.ds.namespace_manager.qname(object_name), classes_name, attributes) def add_class_node(self, class_name, attributes): self.d.add_class_node(self.ds.namespace_manager.qname(class_name), attributes) def add_edge(self, src, dst, predicate): self.d.add_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst), self.ds.namespace_manager.qname(predicate)) def add_subclass_edge(self, src, dst): self.d.add_subclass_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst)) def create_namespace_box(self): # Create Namespace box self.d.add_label("Namespaces:\l") for ns in sorted(self.ds.namespaces()): self.d.add_label("%s:\t%s \l" % (ns[0], ns[1])) def output_dot(self): self.d.write_to_file() def close(self): self.create_namespace_box() self.d.close() def visualize(self): self.d.visualize()
class BurstConverter(object): """The actual converter, that processes the chunk of lines from the CSV file, and uses the instructions from the ``schema`` graph to produce RDF.""" def __init__(self, identifier, columns, schema, metadata_graph, encoding, output_format): self.ds = Dataset() # self.ds = apply_default_namespaces(Dataset()) self.g = self.ds.graph(URIRef(identifier)) self.columns = columns self.schema = schema self.metadata_graph = metadata_graph self.encoding = encoding self.output_format = output_format self.templates = {} self.aboutURLSchema = self.schema.csvw_aboutUrl def equal_to_null(self, nulls, row): """Determines whether a value in a cell matches a 'null' value as specified in the CSVW schema)""" for n in nulls: n = Item(self.metadata_graph, n) col = str(n.csvw_name) val = str(n.csvw_null) if row[col] == val: logger.debug("Value of column {} ('{}') is equal to specified 'null' value: '{}'".format(col, unicode(row[col]).encode('utf-8'), val)) # There is a match with null value return True # There is no match with null value return False def process(self, count, rows, chunksize): """Process the rows fed to the converter. Count and chunksize are used to determine the current row number (needed for default observation identifiers)""" obs_count = count * chunksize # logger.info("Row: {}".format(obs_count)) #removed for readability # We iterate row by row, and then column by column, as given by the CSVW mapping file. mult_proc_counter = 0 iter_error_counter= 0 for row in rows: # This fixes issue:10 if row is None: mult_proc_counter += 1 # logger.debug( #removed for readability # "Skipping empty row caused by multiprocessing (multiple of chunksize exceeds number of rows in file)...") continue # set the '_row' value in case we need to generate 'default' URIs for each observation () # logger.debug("row: {}".format(obs_count)) #removed for readability row[u'_row'] = obs_count count += 1 # The self.columns dictionary gives the mapping definition per column in the 'columns' # array of the CSVW tableSchema definition. for c in self.columns: c = Item(self.metadata_graph, c) # default about URL s = self.expandURL(self.aboutURLSchema, row) try: # Can also be used to prevent the triggering of virtual # columns! # Get the raw value from the cell in the CSV file value = row[unicode(c.csvw_name)] # This checks whether we should continue parsing this cell, or skip it. if self.isValueNull(value, c): continue # If the null values are specified in an array, we need to parse it as a collection (list) elif isinstance(c.csvw_null, Item): nulls = Collection(self.metadata_graph, BNode(c.csvw_null)) if self.equal_to_null(nulls, row): # Continue to next column specification in this row, if the value is equal to (one of) the null values. continue except: # No column name specified (virtual) because there clearly was no c.csvw_name key in the row. # logger.debug(traceback.format_exc()) #removed for readability iter_error_counter +=1 if isinstance(c.csvw_null, Item): nulls = Collection(self.metadata_graph, BNode(c.csvw_null)) if self.equal_to_null(nulls, row): # Continue to next column specification in this row, if the value is equal to (one of) the null values. continue try: # This overrides the subject resource 's' that has been created earlier based on the # schema wide aboutURLSchema specification. if unicode(c.csvw_virtual) == u'true' and c.csvw_aboutUrl is not None: s = self.expandURL(c.csvw_aboutUrl, row) if c.csvw_valueUrl is not None: # This is an object property, because the value needs to be cast to a URL p = self.expandURL(c.csvw_propertyUrl, row) o = self.expandURL(c.csvw_valueUrl, row) if self.isValueNull(os.path.basename(unicode(o)), c): logger.debug("skipping empty value") continue if unicode(c.csvw_virtual) == u'true' and c.csvw_datatype is not None and URIRef(c.csvw_datatype) == XSD.anyURI: # Special case: this is a virtual column with object values that are URIs # For now using a test special property value = row[unicode(c.csvw_name)].encode('utf-8') o = URIRef(iribaker.to_iri(value)) if unicode(c.csvw_virtual) == u'true' and c.csvw_datatype is not None and URIRef(c.csvw_datatype) == XSD.linkURI: about_url = str(c.csvw_aboutUrl) about_url = about_url[about_url.find("{"):about_url.find("}")+1] s = self.expandURL(about_url, row) # logger.debug("s: {}".format(s)) value_url = str(c.csvw_valueUrl) value_url = value_url[value_url.find("{"):value_url.find("}")+1] o = self.expandURL(value_url, row) # logger.debug("o: {}".format(o)) # For coded properties, the collectionUrl can be used to indicate that the # value URL is a concept and a member of a SKOS Collection with that URL. if c.csvw_collectionUrl is not None: collection = self.expandURL(c.csvw_collectionUrl, row) self.g.add((collection, RDF.type, SKOS['Collection'])) self.g.add((o, RDF.type, SKOS['Concept'])) self.g.add((collection, SKOS['member'], o)) # For coded properties, the schemeUrl can be used to indicate that the # value URL is a concept and a member of a SKOS Scheme with that URL. if c.csvw_schemeUrl is not None: scheme = self.expandURL(c.csvw_schemeUrl, row) self.g.add((scheme, RDF.type, SKOS['Scheme'])) self.g.add((o, RDF.type, SKOS['Concept'])) self.g.add((o, SKOS['inScheme'], scheme)) else: # This is a datatype property if c.csvw_value is not None: value = self.render_pattern(unicode(c.csvw_value), row) elif c.csvw_name is not None: # print s # print c.csvw_name, self.encoding # print row[unicode(c.csvw_name)], type(row[unicode(c.csvw_name)]) # print row[unicode(c.csvw_name)].encode('utf-8') # print '...' value = row[unicode(c.csvw_name)].encode('utf-8') else: raise Exception("No 'name' or 'csvw:value' attribute found for this column specification") # If propertyUrl is specified, use it, otherwise use # the column name if c.csvw_propertyUrl is not None: p = self.expandURL(c.csvw_propertyUrl, row) else: if "" in self.metadata_graph.namespaces(): propertyUrl = self.metadata_graph.namespaces()[""][ unicode(c.csvw_name)] else: propertyUrl = "{}{}".format(get_namespaces()['sdv'], unicode(c.csvw_name)) p = self.expandURL(propertyUrl, row) if c.csvw_datatype is not None: if URIRef(c.csvw_datatype) == XSD.anyURI: # The xsd:anyURI datatype will be cast to a proper IRI resource. o = URIRef(iribaker.to_iri(value)) elif URIRef(c.csvw_datatype) == XSD.string and c.csvw_lang is not None: # If it is a string datatype that has a language, we turn it into a # language tagged literal # We also render the lang value in case it is a # pattern. o = Literal(value, lang=self.render_pattern( c.csvw_lang, row)) else: o = Literal(value, datatype=c.csvw_datatype, normalize=False) else: # It's just a plain literal without datatype. o = Literal(value) # Add the triple to the assertion graph self.g.add((s, p, o)) # Add provenance relating the propertyUrl to the column id if '@id' in c: self.g.add((p, PROV['wasDerivedFrom'], URIRef(c['@id']))) except: # print row[0], value traceback.print_exc() # We increment the observation (row number) with one obs_count += 1 logger.debug( "{} row skips caused by multiprocessing (multiple of chunksize exceeds number of rows in file)...".format(mult_proc_counter)) logger.debug( "{} errors encountered while trying to iterate over a NoneType...".format(mult_proc_counter)) logger.info("... done") return self.ds.serialize(format=self.output_format) # def serialize(self): # trig_file_name = self.file_name + '.trig' # logger.info("Starting serialization to {}".format(trig_file_name)) # # with open(trig_file_name, 'w') as f: # self.np.serialize(f, format='trig') # logger.info("... done") def render_pattern(self, pattern, row): """Takes a Jinja or Python formatted string, and applies it to the row value""" # Significant speedup by not re-instantiating Jinja templates for every # row. if pattern in self.templates: template = self.templates[pattern] else: template = self.templates[pattern] = Template(pattern) # TODO This should take into account the special CSVW instructions such as {_row} # First we interpret the url_pattern as a Jinja2 template, and pass all # column/value pairs as arguments rendered_template = template.render(**row) try: # We then format the resulting string using the standard Python2 # expressions return rendered_template.format(**row) except: logger.warning( u"Could not apply python string formatting, probably due to mismatched curly brackets. IRI will be '{}'. ".format(rendered_template)) return rendered_template def expandURL(self, url_pattern, row, datatype=False): """Takes a Jinja or Python formatted string, applies it to the row values, and returns it as a URIRef""" url = self.render_pattern(unicode(url_pattern), row) # DEPRECATED # for ns, nsuri in namespaces.items(): # if url.startswith(ns): # url = url.replace(ns + ':', nsuri) # break try: iri = iribaker.to_iri(url) rfc3987.parse(iri, rule='IRI') except: raise Exception(u"Cannot convert `{}` to valid IRI".format(url)) # print "Baked: ", iri return URIRef(iri) def isValueNull(self, value, c): """This checks whether we should continue parsing this cell, or skip it because it is empty or a null value.""" try: if len(value) == 0 and unicode(c.csvw_parseOnEmpty) == u"true": print("Not skipping empty value") return False #because it should not be skipped elif len(value) == 0 or value == unicode(c.csvw_null) or value in [unicode(n) for n in c.csvw_null] or value == unicode(self.schema.csvw_null): # Skip value if length is zero and equal to (one of) the null value(s) logger.debug( "Length is 0 or value is equal to specified 'null' value") return True except: logger.debug("null does not exist or is not a list.") return False
class RDFtoUmlDiagram(): """ Transform a RDF dataset to an UML diagram """ def __init__(self, showObjs, showClasses, namespace): self.ds = Dataset() self.d = UmlPygraphVizDiagram() self.show_objs = showObjs self.show_classes = showClasses self.namespace = namespace self.add_namespaces(self.namespace) def load_rdf(self, filename, input_format=None): if input_format: rdf_format = input_format elif filename is not sys.stdin: format_list = {'.xml': 'xml', '.rdf': 'xml', '.owl': 'xml', '.n3': 'n3', '.ttl': 'turtle', '.nt': 'nt', '.trig': 'trig', '.nq': 'nquads', '': 'turtle'} extension = splitext(filename.name)[1] rdf_format = format_list[extension] else: rdf_format = 'turtle' print("using rdf format: " + rdf_format) temp = self.ds.graph("file://"+filename.name) temp.parse(filename.name, format=rdf_format) def add_namespaces(self, namespaces): if namespaces: for ns in namespaces: self.ds.namespace_manager.bind(ns[0],ns[1]) def start_subgraph(self, graph_name): self.d.start_subgraph(graph_name.strip('[<>]:_')) def add_object_node(self, object_name, classes_name, attributes): self.d.add_object_node(self.ds.namespace_manager.qname(object_name), classes_name, attributes) def add_class_node(self, class_name, attributes): self.d.add_class_node(self.ds.namespace_manager.qname(class_name), attributes) def add_edge(self, src, dst, predicate): self.d.add_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst), self.ds.namespace_manager.qname(predicate)) def add_subclass_edge(self, src, dst): self.d.add_subclass_edge(self.ds.namespace_manager.qname(src), self.ds.namespace_manager.qname(dst)) def create_namespace_box(self): # Create Namespace box label = """< <table align="left" cellborder="0"> <tr><td align='center' colspan='2'><b>Namespaces</b></td></tr>""" for ns in sorted(self.ds.namespaces()): label += "<tr><td align='left'>%s:</td><td align='left'>%s</td></tr>" % (ns[0], ns[1] ) label += "</table> >" self.d.set_label(label) def output_dot(self, filename): self.d.write_to_file(filename) def visualize(self, filename): self.d.visualize(filename, self.ds.namespaces()) def create_diagram(self, object_nodes=True, class_nodes=False): # Iterate over all graphs for graph in self.ds.contexts(): graph_name = graph.n3() if graph_name == "[<urn:x-rdflib:default>]": break graph = graph.skolemize() if len(graph) > 0: self.start_subgraph(graph_name) if self.show_objs: self.create_object_nodes(graph) if self.show_classes: self.create_class_nodes(graph) self.d.add_undescribed_nodes() self.create_namespace_box() def create_object_nodes(self, graph): # object nodes query_nodes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?node WHERE { ?node a ?class. FILTER (?class not IN (rdfs:Class, owl:Class, owl:Property, owl:ObjectProperty, owl:DatatypeProperty)) } ORDER BY ?node""" result_nodes = graph.query(query_nodes) for row_nodes in result_nodes: # adding the classes to the node (can be more than one) query_classes = """SELECT DISTINCT ?class WHERE { %s a ?class. } ORDER BY ?class""" % row_nodes['node'].n3() result_classes = graph.query(query_classes) classes = [] for row_classes in result_classes: if not self.show_classes: classes.append(self.ds.namespace_manager.qname(row_classes['class'])) else: self.add_edge(row_nodes['node'], row_classes['class'], "http://www.w3.org/1999/02/22-rdf-syntax-ns#type") # adding the attributes to the node query_attributes = """SELECT DISTINCT ?p ?o WHERE { %s ?p ?o. FILTER (isLiteral(?o)) } ORDER BY ?p ?o""" % row_nodes['node'].n3() result_attributes = graph.query(query_attributes) attributes = [] for row_attributes in result_attributes: attributes.append( self.ds.namespace_manager.qname(row_attributes['p']) + " = " + str(row_attributes['o'])) self.add_object_node(row_nodes['node'], ", ".join(classes), attributes) # object node connections query_connections = """SELECT DISTINCT ?c1 ?c2 ?p WHERE { ?c1 ?p ?c2. FILTER (!isLiteral(?c2)) FILTER (?p not IN (rdf:type, rdfs:domain, rdfs:range, rdfs:subClassOf)) } ORDER BY ?c1 ?p ?c2""" result_connections = graph.query(query_connections) for row_connections in result_connections: self.add_edge(row_connections['c1'], row_connections['c2'], row_connections['p']) def create_class_nodes(self, graph): # RDFS stuff query_classes = """PREFIX owl: <http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?class WHERE { ?class a ?c . FILTER (?c in (rdfs:Class, owl:Class)) } ORDER BY ?class""" result_classes = graph.query(query_classes) for row_classes in result_classes: query_datatype_property = """ PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> SELECT DISTINCT ?property ?range WHERE { ?property rdfs:domain %s; a owl:DatatypeProperty. OPTIONAL{ ?property rdfs:range ?range. } } ORDER BY ?property""" % row_classes['class'].n3() result_datatype_property = graph.query(query_datatype_property) attributes = [] for r in result_datatype_property: text = self.ds.namespace_manager.qname(r['property']) if r['range']: text += " = " + self.ds.namespace_manager.qname(r['range']) attributes.append(text) self.add_class_node(row_classes['class'], attributes) query_object_property = """SELECT DISTINCT ?src ?dest ?property WHERE { ?property a <http://www.w3.org/2002/07/owl#ObjectProperty>; rdfs:domain ?src; rdfs:range ?dest. } ORDER BY ?src ?property ?dest""" result_object_property = graph.query(query_object_property) for row_object_property in result_object_property: self.add_edge(row_object_property['src'], row_object_property['dest'], row_object_property['property']) query_subclass = """SELECT DISTINCT ?src ?dest WHERE { ?src rdfs:subClassOf ?dest. } ORDER BY ?src ?dest""" result_subclass = graph.query(query_subclass) for row_subclass in result_subclass: self.add_subclass_edge(row_subclass['src'], row_subclass['dest'])
#!/usr/bin/python from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD import rdflib.resource from provmodified import Entity import provmodified as prov import json import subprocess, shlex import collections DOCKER = Namespace("http://www.example.org/ns/docker#") PROV = Namespace("http://www.w3.org/ns/prov#") ds = Dataset(default_union=True) ds.bind("docker", DOCKER) ds.bind("prov", PROV) default_graph = ds def bind_ns(prefix, namespace): ds.namespace_manager.bind(prefix, Namespace(namespace)) def parse_json_byfile(filename): with open(filename) as data_file: data = json.load(data_file) return data[0] def inspect_json(cmd): # print cmd p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
from rdflib import Literal, BNode, Namespace, URIRef, Graph, Dataset, RDF, RDFS, XSD import rdflib.resource """ @newfield iri: IRI """ PROV = Namespace("http://www.w3.org/ns/prov#") ds = Dataset(default_union=True) ds.bind("prov", PROV) default_graph = ds #print type(default_graph) config = { "useInverseProperties": False } def set_use_inverse_properties(flag=False): config["useInverseProperties"] = flag def using_inverse_properties(): return config["useInverseProperties"] def clear_graph(bundle=default_graph): bundle.remove((None, None, None))
LOCAL_STORE = config.LOCAL_STORE LOCAL_FILE = config.LOCAL_FILE SPARQL_ENDPOINT_MAPPING = config.SPARQL_ENDPOINT_MAPPING SPARQL_ENDPOINT = config.SPARQL_ENDPOINT DEFAULT_BASE = config.DEFAULT_BASE QUERY_RESULTS_LIMIT = config.QUERY_RESULTS_LIMIT CUSTOM_PARAMETERS = config.CUSTOM_PARAMETERS labels = {} g = Dataset() if LOCAL_STORE: log.info("Loading local file: {}".format(LOCAL_FILE)) try: format = rdflib.util.guess_format(LOCAL_FILE) g.load(LOCAL_FILE, format=format) except: log.error(traceback.format_exc()) raise Exception("Cannot guess file format for {} or could not load file".format(LOCAL_FILE)) def visit(url, format='html'): log.debug("Starting query") if LOCAL_STORE:
print header with open(filename,'r') as csvfile: csv_contents = [{k: v for k, v in row.items()} for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')] return csv_contents #//*************** csv parser ****************//# graph_uri_base = resource path = 'source_datasets/' filename_population = 'all_population_by_type.csv' filename_unemployment = 'unemployment_eu.csv' filename_inflow = 'inflow_dataset.csv' filename_asylum = 'asylum_seekers.csv' dataset = Dataset() dataset.bind('mpr', RESOURCE) dataset.bind('mpo', VOCAB) dataset.bind('geo', GEO) dataset.bind('geo_country_code', GCC) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.bind('sdmx', SDMX) dataset.default_context.parse(VOCAB_FILE, format='turtle') dataset, unemployment_eu_graph = convert_unemployment_csv(filename_unemployment,dataset,URIRef(graph_uri_base + 'unemployment_eu_graph')) dataset, population_eu_graph = convert_population_csv(filename_population,dataset,URIRef(graph_uri_base + 'population_eu_graph')) dataset, inflow_graph = convert_inflow_csv(filename_inflow,dataset,URIRef(graph_uri_base + 'inflow_graph'))
def data_structure_definition(profile, dataset_name, dataset_base_uri, variables, source_path, source_hash): """Converts the dataset + variables to a set of rdflib Graphs (a nanopublication with provenance annotations) that contains the data structure definition (from the DataCube vocabulary) and the mappings to external datasets. Arguments: dataset -- the name of the dataset variables -- the list of dictionaries with the variables and their mappings to URIs profile -- the Google signin profile source_path -- the path to the dataset file that was annotated source_hash -- the Git hash of the dataset file version of the dataset :returns: an RDF graph store containing a nanopublication """ BASE = Namespace("{}/".format(dataset_base_uri)) dataset_uri = URIRef(dataset_base_uri) # Initialize a conjunctive graph for the whole lot rdf_dataset = Dataset() rdf_dataset.bind("qbrv", QBRV) rdf_dataset.bind("qbr", QBR) rdf_dataset.bind("qb", QB) rdf_dataset.bind("skos", SKOS) rdf_dataset.bind("prov", PROV) rdf_dataset.bind("np", NP) rdf_dataset.bind("foaf", FOAF) # Initialize the graphs needed for the nanopublication timestamp = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M") # Shorten the source hash to 8 digits (similar to Github) source_hash = source_hash[:8] hash_part = source_hash + "/" + timestamp # The Nanopublication consists of three graphs assertion_graph_uri = BASE["assertion/" + hash_part] assertion_graph = rdf_dataset.graph(assertion_graph_uri) provenance_graph_uri = BASE["provenance/" + hash_part] provenance_graph = rdf_dataset.graph(provenance_graph_uri) pubinfo_graph_uri = BASE["pubinfo/" + hash_part] pubinfo_graph = rdf_dataset.graph(pubinfo_graph_uri) # A URI that represents the author author_uri = QBR["person/" + profile["email"]] rdf_dataset.add((author_uri, RDF.type, FOAF["Person"])) rdf_dataset.add((author_uri, FOAF["name"], Literal(profile["name"]))) rdf_dataset.add((author_uri, FOAF["email"], Literal(profile["email"]))) rdf_dataset.add((author_uri, QBRV["googleId"], Literal(profile["id"]))) try: rdf_dataset.add((author_uri, FOAF["depiction"], URIRef(profile["image"]))) except KeyError: pass # A URI that represents the version of the dataset source file dataset_version_uri = BASE[source_hash] # Some information about the source file used rdf_dataset.add((dataset_version_uri, QBRV["path"], Literal(source_path, datatype=XSD.string))) rdf_dataset.add((dataset_version_uri, QBRV["sha1_hash"], Literal(source_hash, datatype=XSD.string))) # ---- # The nanopublication itself # ---- nanopublication_uri = BASE["nanopublication/" + hash_part] rdf_dataset.add((nanopublication_uri, RDF.type, NP["Nanopublication"])) rdf_dataset.add((nanopublication_uri, NP["hasAssertion"], assertion_graph_uri)) rdf_dataset.add((assertion_graph_uri, RDF.type, NP["Assertion"])) rdf_dataset.add((nanopublication_uri, NP["hasProvenance"], provenance_graph_uri)) rdf_dataset.add((provenance_graph_uri, RDF.type, NP["Provenance"])) rdf_dataset.add((nanopublication_uri, NP["hasPublicationInfo"], pubinfo_graph_uri)) rdf_dataset.add((pubinfo_graph_uri, RDF.type, NP["PublicationInfo"])) # ---- # The provenance graph # ---- # Provenance information for the assertion graph (the data structure definition itself) provenance_graph.add((assertion_graph_uri, PROV["wasDerivedFrom"], dataset_version_uri)) provenance_graph.add((dataset_uri, PROV["wasDerivedFrom"], dataset_version_uri)) provenance_graph.add((assertion_graph_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime))) provenance_graph.add((assertion_graph_uri, PROV["wasAttributedTo"], author_uri)) # ---- # The publication info graph # ---- # The URI of the latest version of QBer # TODO: should point to the actual latest commit of this QBer source file. # TODO: consider linking to this as the plan of some activity, rather than an activity itself. qber_uri = URIRef("https://github.com/CLARIAH/qber.git") pubinfo_graph.add((nanopublication_uri, PROV["wasGeneratedBy"], qber_uri)) pubinfo_graph.add((nanopublication_uri, PROV["generatedAtTime"], Literal(timestamp, datatype=XSD.datetime))) pubinfo_graph.add((nanopublication_uri, PROV["wasAttributedTo"], author_uri)) # ---- # The assertion graph # ---- structure_uri = BASE["structure"] assertion_graph.add((dataset_uri, RDF.type, QB["DataSet"])) assertion_graph.add((dataset_uri, RDFS.label, Literal(dataset_name))) assertion_graph.add((structure_uri, RDF.type, QB["DataStructureDefinition"])) assertion_graph.add((dataset_uri, QB["structure"], structure_uri)) for variable_id, variable in variables.items(): variable_uri = URIRef(variable["original"]["uri"]) variable_label = Literal(variable["original"]["label"]) variable_type = URIRef(variable["type"]) codelist_uri = URIRef(variable["codelist"]["original"]["uri"]) codelist_label = Literal(variable["codelist"]["original"]["label"]) # The variable as component of the definition component_uri = safe_url(BASE, "component/" + variable["original"]["label"]) # Add link between the definition and the component assertion_graph.add((structure_uri, QB["component"], component_uri)) # Add label to variable # TODO: We may need to do something with a changed label for the variable assertion_graph.add((variable_uri, RDFS.label, variable_label)) if "description" in variable and variable["description"] != "": assertion_graph.add((variable_uri, RDFS.comment, Literal(variable["description"]))) # If the variable URI is not the same as the original, # it is a specialization of a prior variable property. if variable["uri"] != str(variable_uri): assertion_graph.add((variable_uri, RDFS["subPropertyOf"], URIRef(variable["uri"]))) if variable_type == QB["DimensionProperty"]: assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["dimension"], variable_uri)) # Coded variables are also of type coded property (a subproperty of dimension property) if variable["category"] == "coded": assertion_graph.add((variable_uri, RDF.type, QB["CodedProperty"])) elif variable_type == QB["MeasureProperty"]: # The category 'other' assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["measure"], variable_uri)) elif variable_type == QB["AttributeProperty"]: # Actually never produced by QBer at this stage assertion_graph.add((variable_uri, RDF.type, variable_type)) assertion_graph.add((component_uri, QB["attribute"], variable_uri)) # If this variable is of category 'coded', we add codelist and URIs for # each variable (including mappings between value uris and etc....) if variable["category"] == "coded": assertion_graph.add((codelist_uri, RDF.type, SKOS["Collection"])) assertion_graph.add((codelist_uri, RDFS.label, Literal(codelist_label))) # The variable should point to the codelist assertion_graph.add((variable_uri, QB["codeList"], codelist_uri)) # The variable is mapped onto an external code list. # If the codelist uri is not the same as the original one, we # have a derived codelist. if variable["codelist"]["uri"] != str(codelist_uri): assertion_graph.add((codelist_uri, PROV["wasDerivedFrom"], URIRef(variable["codelist"]["uri"]))) # Generate a SKOS concept for each of the values and map it to the # assigned codelist for value in variable["values"]: value_uri = URIRef(value["original"]["uri"]) value_label = Literal(value["original"]["label"]) assertion_graph.add((value_uri, RDF.type, SKOS["Concept"])) assertion_graph.add((value_uri, SKOS["prefLabel"], Literal(value_label))) assertion_graph.add((codelist_uri, SKOS["member"], value_uri)) # The value has been changed, and therefore there is a mapping if value["original"]["uri"] != value["uri"]: assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"]))) assertion_graph.add((value_uri, RDFS.label, Literal(value["label"]))) elif variable["category"] == "identifier": # Generate a SKOS concept for each of the values for value in variable["values"]: value_uri = URIRef(value["original"]["uri"]) value_label = Literal(value["original"]["label"]) assertion_graph.add((value_uri, RDF.type, SKOS["Concept"])) assertion_graph.add((value_uri, SKOS["prefLabel"], value_label)) # The value has been changed, and therefore there is a mapping if value["original"]["uri"] != value["uri"]: assertion_graph.add((value_uri, SKOS["exactMatch"], URIRef(value["uri"]))) assertion_graph.add((value_uri, RDFS.label, Literal(value["label"]))) elif variable["category"] == "other": # Generate a literal for each of the values when converting the dataset (but not here) pass return rdf_dataset
def query_test(t): uri, name, comment, data, graphdata, query, resfile, syntax = t # the query-eval tests refer to graphs to load by resolvable filenames rdflib_sparql_module.SPARQL_LOAD_GRAPHS = True if uri in skiptests: raise SkipTest() def skip(reason='(none)'): print "Skipping %s from now on." % uri f = open("skiptests.list", "a") f.write("%s\t%s\n" % (uri, reason)) f.close() try: g = Dataset() if data: g.default_context.load(data, format=_fmt(data)) if graphdata: for x in graphdata: g.load(x, format=_fmt(x)) if not resfile: # no result - syntax test if syntax: translateQuery(parseQuery( open(query[7:]).read()), base=urljoin(query, '.')) else: # negative syntax test try: translateQuery(parseQuery( open(query[7:]).read()), base=urljoin(query, '.')) assert False, 'Query should not have parsed!' except: pass # it's fine - the query should not parse return # eval test - carry out query res2 = g.query(open(query[7:]).read(), base=urljoin(query, '.')) if resfile.endswith('ttl'): resg = Graph() resg.load(resfile, format='turtle', publicID=resfile) res = RDFResultParser().parse(resg) elif resfile.endswith('rdf'): resg = Graph() resg.load(resfile, publicID=resfile) res = RDFResultParser().parse(resg) elif resfile.endswith('srj'): res = Result.parse(open(resfile[7:]), format='json') elif resfile.endswith('tsv'): res = Result.parse(open(resfile[7:]), format='tsv') elif resfile.endswith('csv'): res = Result.parse(open(resfile[7:]), format='csv') # CSV is lossy, round-trip our own resultset to # lose the same info :) # write bytes, read strings... s = BytesIO() res2.serialize(s, format='csv') print s.getvalue() s = StringIO(s.getvalue().decode('utf-8')) # hmm ? res2 = Result.parse(s, format='csv') else: res = Result.parse(open(resfile[7:]), format='xml') if not DETAILEDASSERT: eq(res.type, res2.type, 'Types do not match') if res.type == 'SELECT': eq(set(res.vars), set(res2.vars), 'Vars do not match') comp = bindingsCompatible( set(res), set(res2) ) assert comp, 'Bindings do not match' elif res.type == 'ASK': eq(res.askAnswer, res2.askAnswer, 'Ask answer does not match') elif res.type in ('DESCRIBE', 'CONSTRUCT'): assert isomorphic( res.graph, res2.graph), 'graphs are not isomorphic!' else: raise Exception('Unknown result type: %s' % res.type) else: eq(res.type, res2.type, 'Types do not match: %r != %r' % (res.type, res2.type)) if res.type == 'SELECT': eq(set(res.vars), set(res2.vars), 'Vars do not match: %r != %r' % ( set(res.vars), set(res2.vars))) assert bindingsCompatible( set(res), set(res2) ), 'Bindings do not match: \n%s\n!=\n%s' % ( res.serialize(format='txt', namespace_manager=g.namespace_manager), res2.serialize(format='txt', namespace_manager=g.namespace_manager)) elif res.type == 'ASK': eq(res.askAnswer, res2.askAnswer, "Ask answer does not match: %r != %r" % ( res.askAnswer, res2.askAnswer)) elif res.type in ('DESCRIBE', 'CONSTRUCT'): assert isomorphic( res.graph, res2.graph), 'graphs are not isomorphic!' else: raise Exception('Unknown result type: %s' % res.type) except Exception, e: if isinstance(e, AssertionError): failed_tests.append(uri) fails[str(e)] += 1 else: error_tests.append(uri) errors[str(e)] += 1 if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL: print "======================================" print uri print name print comment if not resfile: if syntax: print "Positive syntax test" else: print "Negative syntax test" if data: print "----------------- DATA --------------------" print ">>>", data print open(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x in graphdata: print ">>>", x print open(x[7:]).read() print "----------------- Query -------------------" print ">>>", query print open(query[7:]).read() if resfile: print "----------------- Res -------------------" print ">>>", resfile print open(resfile[7:]).read() try: pq = parseQuery(open(query[7:]).read()) print "----------------- Parsed ------------------" pprintAlgebra(translateQuery(pq, base=urljoin(query, '.'))) except: print "(parser error)" print decodeStringEscape(unicode(e)) import pdb pdb.post_mortem(sys.exc_info()[2]) # pdb.set_trace() # nose.tools.set_trace() raise
class DatasetTestCase(unittest.TestCase): store = 'default' slow = True tmppath = None def setUp(self): try: self.graph = Dataset(store=self.store) except ImportError: raise SkipTest( "Dependencies for store '%s' not available!" % self.store) if self.store == "SQLite": _, self.tmppath = mkstemp( prefix='test', dir='/tmp', suffix='.sqlite') elif self.store == "SPARQLUpdateStore": root = HOST + DB self.graph.open((root + "sparql", root + "update")) else: self.tmppath = mkdtemp() if self.store != "SPARQLUpdateStore": self.graph.open(self.tmppath, create=True) self.michel = URIRef(u'urn:michel') self.tarek = URIRef(u'urn:tarek') self.bob = URIRef(u'urn:bob') self.likes = URIRef(u'urn:likes') self.hates = URIRef(u'urn:hates') self.pizza = URIRef(u'urn:pizza') self.cheese = URIRef(u'urn:cheese') # Use regular URIs because SPARQL endpoints like Fuseki alter short names self.c1 = URIRef(u'urn:context-1') self.c2 = URIRef(u'urn:context-2') # delete the graph for each test! self.graph.remove((None, None, None)) for c in self.graph.contexts(): c.remove((None, None, None)) assert len(c) == 0 self.graph.remove_graph(c) def tearDown(self): self.graph.close() if self.store == "SPARQLUpdateStore": pass else: if os.path.isdir(self.tmppath): shutil.rmtree(self.tmppath) else: os.remove(self.tmppath) def testGraphAware(self): if not self.graph.store.graph_aware: return g = self.graph g1 = g.graph(self.c1) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # added graph exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph is empty self.assertEqual(len(g1), 0) g1.add((self.tarek, self.likes, self.pizza)) # added graph still exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) # added graph contains one triple self.assertEqual(len(g1), 1) g1.remove((self.tarek, self.likes, self.pizza)) # added graph is empty self.assertEqual(len(g1), 0) # Some SPARQL endpoint backends (e.g. TDB) do not consider # empty named graphs if self.store != "SPARQLUpdateStore": # graph still exists, although empty self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([self.c1, DATASET_DEFAULT_GRAPH_ID])) g.remove_graph(self.c1) # graph is gone self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testDefaultGraph(self): # Something the default graph is read-only (e.g. TDB in union mode) if self.store == "SPARQLUpdateStore": print("Please make sure updating the default graph " "is supported by your SPARQL endpoint") self.graph.add((self.tarek, self.likes, self.pizza)) self.assertEqual(len(self.graph), 1) # only default exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) # removing default graph removes triples but not actual graph self.graph.remove_graph(DATASET_DEFAULT_GRAPH_ID) self.assertEqual(len(self.graph), 0) # default still exists self.assertEqual(set(x.identifier for x in self.graph.contexts()), set([DATASET_DEFAULT_GRAPH_ID])) def testNotUnion(self): # Union depends on the SPARQL endpoint configuration if self.store == "SPARQLUpdateStore": print("Please make sure your SPARQL endpoint has not configured " "its default graph as the union of the named graphs") g1 = self.graph.graph(self.c1) g1.add((self.tarek, self.likes, self.pizza)) self.assertEqual(list(self.graph.objects(self.tarek, None)), []) self.assertEqual(list(g1.objects(self.tarek, None)), [self.pizza])
import flask_rdf from flask_rdf.flask import returns_rdf from flask_restful import Resource, Api app = Flask(__name__) api = Api(app) # set up a custom formatter to return turtle in text/plain to browsers custom_formatter = flask_rdf.FormatSelector() custom_formatter.wildcard_mimetype = 'application/ld+json' custom_formatter.add_format('application/ld+json', 'json-ld') custom_decorator = flask_rdf.flask.Decorator(custom_formatter) ds = Dataset(default_union=True) with open('./dectectorfinalstate.owl', "r") as f: result = ds.parse(f, format="application/rdf+xml") class HelloWorld(Resource): @custom_decorator def get(self): return ds api.add_resource(HelloWorld, '/detectorfinalstate') @app.route("/") def main(): # This is cached, so for development it is better
def visit_sparql(url, format='html', depth=1): sparqls = get_sparql_endpoints(url) predicates = get_predicates(sparqls, url) if format == 'html': limit_fraction = QUERY_RESULTS_LIMIT / 3 if len(predicates) > 1: predicate_query_limit_fraction = ( limit_fraction * 2) / len(predicates) else: predicate_query_limit_fraction = limit_fraction * 2 results = [] def predicate_specific_sparql(sparql, query): log.debug(query) sparql.setQuery(query) res = sparql.query().convert() results.extend( list(res["results"]["bindings"])) threads = [] local_results = [] for p in predicates: q = u"""SELECT DISTINCT ?s ?p ?o ?g WHERE {{ {{ GRAPH ?g {{ {{ <{url}> <{predicate}> ?o . BIND(<{url}> as ?s) BIND(<{predicate}> as ?p) }} UNION {{ ?s <{predicate}> <{url}>. BIND(<{url}> as ?o) BIND(<{predicate}> as ?p) }} }} }} UNION {{ {{ <{url}> <{predicate}> ?o . BIND(<{url}> as ?s) BIND(<{predicate}> as ?p) }} UNION {{ ?s <{predicate}> <{url}>. BIND(<{url}> as ?o) BIND(<{predicate}> as ?p) }} }} }} LIMIT {limit}""".format(url=url, predicate=p, limit=predicate_query_limit_fraction) for s in sparqls: # Start processes for each endpoint, for each predicate query process = Thread(target=predicate_specific_sparql, args=[s, q]) process.start() threads.append(process) url_is_predicate_query = u"""SELECT DISTINCT ?s ?p ?o ?g WHERE {{ {{ GRAPH ?g {{ ?s <{url}> ?o. BIND(<{url}> as ?p) }} }} UNION {{ ?s <{url}> ?o. BIND(<{url}> as ?p) }} }} LIMIT {limit}""".format(url=url, limit=limit_fraction) for s in sparqls: process = Thread(target=predicate_specific_sparql, args=[s, url_is_predicate_query]) process.start() threads.append(process) # We now pause execution on the main thread by 'joining' all of our started threads. # This ensures that each has finished processing the urls. for process in threads: process.join() if LDF_STATEMENTS_URL is not None: retrieve_ldf_results(url) # We also add local results (result of dereferencing) local_results = list(visit_local(url, format)) results.extend(local_results) # If a Druid statements URL is specified, we'll try to receive it as # well if DRUID_STATEMENTS_URL is not None: results.extend(visit_druid(url, format)) if depth > 1: # If depth is larger than 1, we proceed to extend the results with the results of # visiting all object resources for every triple in the resultset. newresults = [] objects = set([r['o']['value'] for r in results if r['o']['value'] != url and r['o']['type']=='uri']) for o in objects: newresults.extend( visit(o, format=format, depth=depth - 1)) results.extend(newresults) else: q = u""" CONSTRUCT {{ ?s ?p ?o . }} WHERE {{ {{ GRAPH ?g {{ {{ <{url}> ?p ?o . BIND(<{url}> as ?s) }} UNION {{ ?s ?p <{url}>. BIND(<{url}> as ?o) }} UNION {{ ?s <{url}> ?o. BIND(<{url}> as ?p) }} }} }} UNION {{ {{ <{url}> ?p ?o . BIND(<{url}> as ?s) }} UNION {{ ?s ?p <{url}>. BIND(<{url}> as ?o) }} UNION {{ ?s <{url}> ?o. BIND(<{url}> as ?p) }} }} }} LIMIT {limit}""".format(url=url, limit=QUERY_RESULTS_LIMIT) result_dataset = Dataset() for s in sparqls: s.setQuery(q) s.setReturnFormat(XML) result_dataset += s.query().convert() if format == 'jsonld': results = result_dataset.serialize(format='json-ld') elif format == 'rdfxml': s.setReturnFormat(XML) results = result_dataset.serialize(format='pretty-xml') elif format == 'turtle': s.setReturnFormat(XML) results = result_dataset.serialize(format='turtle') else: results = 'Nothing' log.debug("Received results") return results
def createNanopubs(g): ds = Dataset() ds.namespace_manager.bind("ddi","http://dbmi-icode-01.dbmi.pitt.edu/mp/") ds.namespace_manager.bind("np", "http://www.nanopub.org/nschema#") ds.namespace_manager.bind("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#") ds.namespace_manager.bind("rdfs", "http://www.w3.org/2000/01/rdf-schema#") ds.namespace_manager.bind("owl", "http://www.w3.org/2002/07/owl#") ds.namespace_manager.bind("obo", "http://purl.obolibrary.org/obo/") ds.namespace_manager.bind("oboInOwl", "http://www.geneontology.org/formats/oboInOwl#") ds.namespace_manager.bind("xsd", "http://www.w3.org/2001/XMLSchema#") ds.namespace_manager.bind("dc", "http://purl.org/dc/elements/1.1/") ds.namespace_manager.bind("mp", "http://purl.org/mp/") ds.namespace_manager.bind("prov", "http://www.w3.org/ns/prov#") ds.namespace_manager.bind("dikbEvidence", "http://dbmi-icode-01.dbmi.pitt.edu/dikb-evidence/DIKB_evidence_ontology_v1.3.owl#") bindings = g.query(interactSelect) for b in bindings: asIndex = b['a'].decode('utf-8').rfind('-') identifier = b['a'].decode('utf-8')[asIndex:] predicateType = b['t'].decode('utf-8') npURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-nanopub%s') % identifier headURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-head%s') % identifier pubInfoURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-pubInfo%s') % identifier provURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-provenance%s') % identifier aURI = URIRef('http://dbmi-icode-01.dbmi.pitt.edu/mp/ddi-spl-annotation-np-assertion%s') % identifier ds.add(( aURI, RDF.type, np.assertion)) head = ds.add_graph(headURI) head.add((npURI, RDF.type, np['Nanopublication'])) head.add((provURI, RDF.type, np['Provenance'])) head.add((pubInfoURI, RDF.type, np['PublicationInfo'])) head.add((npURI, np['hasAssertion'], aURI)) head.add((npURI, np['hasProvenance'], provURI)) head.add((npURI, np['hasPublicationInfo'], pubInfoURI)) pub = ds.add_graph(pubInfoURI) pub.add((npURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) pub.add((npURI, prov.generatedAtTime, Literal(datetime.now()) )) if(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000000"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) elif(predicateType == "http://purl.obolibrary.org/obo/DIDEO_00000096"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Genotype )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_CT_PK_Phenotype )) elif(predicateType == "http://purl.obolibrary.org/obo/RO_0002449"): provenance = ds.add_graph(provURI) provenance.add(( aURI, prov.wasAttributedTo, URIRef('http://orcid.org/0000-0002-2993-2085'))) provenance.add(( aURI, prov.generatedAtTime, Literal(datetime.now()) )) provenance.add(( aURI, prov.wasDerivedFrom, Literal("Derived from the DIKB's evidence base using the listed belief criteria"))) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_RCT )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_NR )) provenance.add(( aURI, prov.hadMember, dikbEvidence.EV_PK_DDI_Par_Grps )) print ds.serialize(format='trig')
'''This script initializes data file, that contains triples''' from rdflib import Graph, Dataset from shutil import rmtree from os.path import exists from model import update_metagraph DATAPATH = 'data' DOMAIN = 'http://abstractnonsense.net/' GRAPH_NAMESPACE = DOMAIN + 'graph' + '/' DEFAULT_URI = DOMAIN + 'i' DEFAULT_GRAPH = GRAPH_NAMESPACE + 'i' def remove_data(datapath): '''SIDE EFFECTS''' if exists(datapath): rmtree(datapath) return None ds = Dataset(store='Sleepycat') remove_data(DATAPATH) ds.open('data', create=True) g = ds.get_context(identifier=DEFAULT_GRAPH) g.parse('foaf.ttl', format='n3') update_metagraph(DEFAULT_GRAPH, DEFAULT_URI, ds) ds.close()
# line[str] = strip_tags(line[str]) # line[str] = unicode(line[str], errors='replace') # #print line with open(filename,'r') as csvfile: csv_contents = [{k: v for k, v in row.items()} for row in csv.DictReader(csvfile, skipinitialspace=True, quotechar='"', delimiter=',')] return csv_contents #//*************** csv parser ****************//# graph_uri_base = resource + 'movement_of_people/' path = 'source_datasets/' filename = 'Movement_of_people_across_borders_dataset.csv' dataset = Dataset() dataset.bind('trumpres', RESOURCE) dataset.bind('trumpvoc', VOCAB) dataset.bind('geo', GEO) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.default_context.parse(VOCAB_FILE, format='turtle') dataset, movement_graph = convert_csv(path + filename,dataset,URIRef(graph_uri_base + 'movement_graph')) serialize_upload(OUTPUT_DIR + 'movement_of_people.trig',dataset) ### Generate VoID metadata from rdflib.void import generateVoID
response = requests.post(transaction_close_url) return str(response.status_code) def serialize_upload(filename, dataset, upload=True): with open(filename, 'w') as f: dataset.serialize(f, format='trig') upload_to_stardog(dataset.serialize(format='trig')) graph_uri_base = resource + 'findaslot/' drop_stardog() dataset = Dataset() dataset.bind('fasdat', RESOURCE) dataset.bind('fasont', VOCAB) dataset.bind('geo', GEO) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset.default_context.parse(VOCAB_FILE, format='turtle') # Upload vocabulary with open(VOCAB_FILE, 'r') as f: upload_to_stardog(f.read()) dataset, t_graph = convert_dataset( SOURCE_DATA_DIR + 'Theater.json', dataset, URIRef(graph_uri_base + 'theaters'), museums=False) serialize_upload(OUTPUT_DIR + 'theaters.trig', t_graph)
def update_test(t): # the update-eval tests refer to graphs on http://example.org rdflib_sparql_module.SPARQL_LOAD_GRAPHS = False uri, name, comment, data, graphdata, query, res, syntax = t if uri in skiptests: raise SkipTest() try: g = Dataset() if not res: if syntax: translateUpdate(parseUpdate(open(query[7:]))) else: try: translateUpdate(parseUpdate(open(query[7:]))) raise AssertionError("Query shouldn't have parsed!") except: pass # negative syntax test return resdata, resgraphdata = res # read input graphs if data: g.default_context.load(data, format=_fmt(data)) if graphdata: for x, l in graphdata: g.load(x, publicID=URIRef(l), format=_fmt(x)) req = translateUpdate(parseUpdate(open(query[7:]))) evalUpdate(g, req) # read expected results resg = Dataset() if resdata: resg.default_context.load(resdata, format=_fmt(resdata)) if resgraphdata: for x, l in resgraphdata: resg.load(x, publicID=URIRef(l), format=_fmt(x)) eq(set(x.identifier for x in g.contexts() if x != g.default_context), set(x.identifier for x in resg.contexts() if x != resg.default_context), 'named graphs in datasets do not match') assert isomorphic(g.default_context, resg.default_context), \ 'Default graphs are not isomorphic' for x in g.contexts(): if x == g.default_context: continue assert isomorphic(x, resg.get_context(x.identifier)), \ "Graphs with ID %s are not isomorphic" % x.identifier except Exception, e: if isinstance(e, AssertionError): failed_tests.append(uri) fails[str(e)] += 1 else: error_tests.append(uri) errors[str(e)] += 1 if DEBUG_ERROR and not isinstance(e, AssertionError) or DEBUG_FAIL: print "======================================" print uri print name print comment if not res: if syntax: print "Positive syntax test" else: print "Negative syntax test" if data: print "----------------- DATA --------------------" print ">>>", data print open(data[7:]).read() if graphdata: print "----------------- GRAPHDATA --------------------" for x, l in graphdata: print ">>>", x, l print open(x[7:]).read() print "----------------- Request -------------------" print ">>>", query print open(query[7:]).read() if res: if resdata: print "----------------- RES DATA --------------------" print ">>>", resdata print open(resdata[7:]).read() if resgraphdata: print "----------------- RES GRAPHDATA -------------------" for x, l in resgraphdata: print ">>>", x, l print open(x[7:]).read() print "------------- MY RESULT ----------" print g.serialize(format='trig') try: pq = translateUpdate(parseUpdate(open(query[7:]).read())) print "----------------- Parsed ------------------" pprintAlgebra(pq) # print pq except: print "(parser error)" print decodeStringEscape(unicode(e)) import pdb pdb.post_mortem(sys.exc_info()[2]) raise
from rdflib import Graph, ConjunctiveGraph, Dataset, URIRef, Namespace, Literal from posixpath import join from uuid import uuid4 from datetime import datetime from helper import quote, unquote, url_exists DATAPATH = 'data' HTTP = 'http://' DOMAIN = 'abstractnonsense.net' STORE = 'Sleepycat' NAMESPACE = Namespace(join(HTTP, DOMAIN, '')) ds = Dataset(store=STORE) ds.open(DATAPATH, create=False) # it stays open all the time, just commits are made cg = ConjunctiveGraph(store=STORE) cg.open(DATAPATH, create=False) # cg.bind('foaf', 'http://xmlns.com/foaf/0.1/') # FOAF namespace understood # DBPedia workaround from rdflib.plugin import register, Parser register('text/rdf+n3', Parser, 'rdflib.plugins.parsers.notation3', 'N3Parser') def start(): '''This starts the background script. The background script uses a (currently) hardcoded pattern, according to which the script harvests data. It recursively gathers more and more data, but only to a finite depth.
from oldman import ClientResourceManager, parse_graph_safely, SPARQLDataStore from oldman.rest.crud import HashLessCRUDer logging.config.fileConfig(path.join(path.dirname(__file__), 'logging.ini')) sesame_iri = "http://*****:*****@context": [ {
response = requests.post(transaction_close_url) return str(response.status_code) def serialize_upload(filename, dataset, upload=True): with open(filename, 'w') as f: dataset.serialize(f, format='trig') upload_to_stardog(dataset.serialize(format='trig')) graph_uri_base = resource + 'findaslot/' drop_stardog() dataset = Dataset() dataset.bind('fasdat', RESOURCE) dataset.bind('fasont', VOCAB) dataset.bind('geo', GEO) dataset.bind('dbo', DBO) dataset.bind('dbr', DBR) dataset, t_graph = convert_dataset( SOURCE_DATA_DIR + 'Theater.json', dataset, URIRef(graph_uri_base + 'theaters')) serialize_upload(OUTPUT_DIR + 'theaters.trig', dataset) dataset.remove_graph(t_graph) dataset, mg_graph = convert_dataset( SOURCE_DATA_DIR + 'MuseaGalleries.json', dataset, URIRef(graph_uri_base + 'museums')) serialize_upload(OUTPUT_DIR + 'museums.trig', dataset) dataset.remove_graph(mg_graph)