def store(self, cur_g, base_dir, base_iri, context_path, tmp_dir=None, override=False, already_processed={}, store_now=True, remove_data=False): self.repok.new_article() self.reperr.new_article() if len(cur_g) > 0: cur_dir_path, cur_file_path = self.dir_and_file_paths( cur_g, base_dir, base_iri) try: if not os.path.exists(cur_dir_path): os.makedirs(cur_dir_path) final_g = ConjunctiveGraph() final_g.addN( [item + (cur_g.identifier, ) for item in list(cur_g)]) # Remove the data if remove_data: stored_g = None if cur_file_path in already_processed: stored_g = already_processed[cur_file_path] elif os.path.exists(cur_file_path): stored_g = self.load(cur_file_path, cur_g, tmp_dir) for s, p, o, g in final_g.quads((None, None, None, None)): stored_g.remove((s, p, o, g)) final_g = stored_g elif not override: # Merging the data if cur_file_path in already_processed: stored_g = already_processed[cur_file_path] stored_g.addN(final_g.quads((None, None, None, None))) final_g = stored_g elif os.path.exists(cur_file_path): # This is a conjunctive graps that contains all the triples (and graphs) # the file is actually defining - they could be more than those using # 'cur_subject' as subject. final_g = self.load(cur_file_path, cur_g, tmp_dir) already_processed[cur_file_path] = final_g if store_now: self.__store_in_file(final_g, cur_file_path, context_path) return already_processed except Exception as e: self.reperr.add_sentence( "[5] It was impossible to store the RDF statements in %s. %s" % (cur_file_path, str(e))) return None
def __store_graph(cur_g, rdf_iri_string, d_dir): try: res_dir, dest_file = \ find_paths(rdf_iri_string, args.base + os.sep, "https://w3id.org/oc/corpus/", 10000, 1000) dest_dir = res_dir.replace(args.base + os.sep, d_dir + os.sep) if not os.path.exists(dest_dir): os.makedirs(dest_dir) cur_file = dest_file.replace(res_dir, dest_dir) if os.path.exists(cur_file): c_graph = __load_graph(cur_file) else: c_graph = ConjunctiveGraph() c_graph.remove_context(c_graph.get_context(cur_g.identifier)) c_graph.addN([item + (cur_g.identifier,) for item in list(cur_g)]) with open(dest_file.replace(res_dir, dest_dir), "w") as f: cur_json_ld = json.loads(c_graph.serialize(format="json-ld", context=context_json)) cur_json_ld["@context"] = context_path json.dump(cur_json_ld, f, indent=4) # repok.add_sentence("File '%s' added." % cur_file) return dest_file except Exception as e: reperr.add_sentence("[5] It was impossible to store the RDF statements in %s. %s" % (dest_file, str(e)))
def __store_in_file(self, cur_g, cur_file_path, context_path): # Note: the following lines from here and until 'cur_json_ld' are a sort of hack for including all # the triples of the input graph into the final stored file. Some how, some of them are not written # in such file otherwise - in particular the provenance ones. new_g = ConjunctiveGraph() for s, p, o in cur_g.triples((None, None, None)): g_iri = None for g_context in cur_g.contexts((s, p, o)): g_iri = g_context.identifier break new_g.addN([(s, p, o, g_iri)]) if not self.nt and not self.nq and context_path: cur_json_ld = json.loads( new_g.serialize( format="json-ld", context=self.__get_context(context_path)).decode("utf-8")) if isinstance(cur_json_ld, dict): cur_json_ld["@context"] = context_path else: # it is a list for item in cur_json_ld: item["@context"] = context_path with open(cur_file_path, "w") as f: json.dump(cur_json_ld, f, indent=4, ensure_ascii=False) elif self.nt: new_g.serialize(cur_file_path, format="nt11", encoding="utf-8") elif self.nq: new_g.serialize(cur_file_path, format="nquads", encoding="utf-8") self.repok.add_sentence("File '%s' added." % cur_file_path)
def test_quad_contexts(): g = ConjunctiveGraph() a = URIRef("urn:a") b = URIRef("urn:b") g.get_context(a).add((a, a, a)) g.addN([(b, b, b, b)]) assert set(g) == set([(a, a, a), (b, b, b)]) for q in g.quads(): assert isinstance(q[3], Graph)
def test_quad_contexts(): g = ConjunctiveGraph() a = URIRef('urn:a') b = URIRef('urn:b') g.get_context(a).add((a, a, a)) g.addN([(b, b, b, b)]) assert set(g) == set([(a, a, a), (b, b, b)]) for q in g.quads(): assert isinstance(q[3], Graph)
def get_where(graph, args): s, p, o, c = _spoc(args) result = ConjunctiveGraph() for subgraph in (x for x in graph.store.contexts((s, p, o)) if c is None or x.identifier == c): result.addN((s, p, o, subgraph.identifier) for s, p, o in subgraph.triples((None, None, None))) return result
def store_graphs_in_file(self, file_path, context_path): self.repok.new_article() self.reperr.new_article() self.repok.add_sentence("Store the graphs into a file: starting process") cg = ConjunctiveGraph() for g in self.g: cg.addN([item + (g.identifier,) for item in list(g)]) self.__store_in_file(cg, file_path, context_path)
def get_where(graph, args): s, p, o, c = _spoc(args) result = ConjunctiveGraph() for subgraph in ( x for x in graph.store.contexts((s, p, o)) if c is None or x.identifier == c ): result.addN((s, p, o, subgraph.identifier) for s, p, o in subgraph.triples((None, None, None))) return result
def _graph(self): """Lazy loading of the _graph attribute This property getter will be called only when the instance attribute self._graph has been deleted. In that case, it will load the graph from self.identifier. This is used by the `from_iri`:meth: class method, to ensure that graphs are only loaded when required... """ if '_graph' in self.__dict__: return self.__dict__['_graph'] headers = self.__dict__.pop('_headers') http = self.__dict__.pop('_http') base_iri = self._identifier.split('#', 1)[0] effective_headers = dict(DEFAULT_REQUEST_HEADERS) if headers: effective_headers.update(headers) http = http or DEFAULT_HTTP_CLIENT LOG.info('downloading <%s>', base_iri) response, content = http.request(base_iri, "GET", headers=effective_headers) LOG.debug('got %s %s %s', response.status, response['content-type'], response.fromcache) if response.status // 100 != 2: raise HttpLib2ErrorWithResponse(response.reason, response, content) source = StringInputSource(content) ctype = response['content-type'].split(';', 1)[0] g = ConjunctiveGraph(identifier=base_iri) g.addN(BACKGROUND_KNOWLEDGE.quads()) g.parse(source, base_iri, ctype) _fix_default_graph(g) # if available, load API Documentation in a separate graph links = response.get('link') if links: if type(links) != list: links = [links] for link in links: match = APIDOC_RE.match(link) if match: self._api_doc = apidoc_iri = URIRef(match.groups()[0]) if apidoc_iri != self.identifier: apidoc = ApiDocumentation.from_iri( apidoc_iri, headers, http) g.addN(apidoc.graph.quads()) break self.__dict__['_graph'] = g return g
def dump(): query = '''SELECT ?x ?y ?z ?g where { GRAPH ?g {?x ?y ?z}}''' data = get_query(query) if data: g = ConjunctiveGraph() for q in data: if q['z']['type'] == 'uri': g.addN([(URIRef(q['x']['value']), URIRef(q['y']['value']), URIRef(q['z']['value']), URIRef(q['g']['value']))]) else: g.addN([(URIRef(q['x']['value']), URIRef(q['y']['value']), Literal(q['z']['value']), URIRef(q['g']['value']))]) g.serialize("dump/dump.nq", format="nquads")
def uploadDocumentContext(annfile): docid = annfile.split('/').pop().split('.')[0] cg = ConjunctiveGraph(identifier=gid['tempUploadGraph']) cg.addN([(s,p,o,gid[docid]) for (s,p,o) in ann2rdf(annfile)]) r = requests.post( AGVM_VC_REPO + "/statements", headers={'Content-Type': 'text/x-nquads'}, data=cg.serialize(format='nquads'), auth=AG_AUTH, params={"commit":1000} ) return r.content
def graphWithoutMetadata(g, ignorePredicates=[]): """ graph filter that removes any statements whose subjects are contexts in the graph and also any statements with the given predicates """ ctxs = [ctx.identifier for ctx in g.contexts()] out = ConjunctiveGraph() for stmt in g.quads((None, None, None)): if stmt[0] not in ctxs and stmt[1] not in ignorePredicates: out.addN([stmt]) return out
def graphWithoutMetadata(g: ConjunctiveGraph, ignorePredicates=[]): """ graph filter that removes any statements whose subjects are contexts in the graph and also any statements with the given predicates """ ctxs = [ctx.identifier for ctx in g.contexts()] out = ConjunctiveGraph() for stmt in g.quads((None, None, None)): if stmt[0] not in ctxs and stmt[1] not in ignorePredicates: out.addN([stmt]) return out
def _graph(self): """Lazy loading of the _graph attribute This property getter will be called only when the instance attribute self._graph has been deleted. In that case, it will load the graph from self.identifier. This is used by the `from_iri`:meth: class method, to ensure that graphs are only loaded when required... """ if '_graph' in self.__dict__: return self.__dict__['_graph'] headers = self.__dict__.pop('_headers') http = self.__dict__.pop('_http') base_iri = self._identifier.split('#', 1)[0] effective_headers = dict(DEFAULT_REQUEST_HEADERS) if headers: effective_headers.update(headers) http = http or DEFAULT_HTTP_CLIENT LOG.info('downloading <%s>', base_iri) response, content = http.request(base_iri, "GET", headers=effective_headers) LOG.debug('got %s %s %s', response.status, response['content-type'], response.fromcache) if response.status // 100 != 2: raise HttpLib2ErrorWithResponse(response.reason, response, content) source = StringInputSource(content) ctype = response['content-type'].split(';',1)[0] g = ConjunctiveGraph(identifier=base_iri) g.addN(BACKGROUND_KNOWLEDGE.quads()) g.parse(source, base_iri, ctype) _fix_default_graph(g) # if available, load API Documentation in a separate graph links = response.get('link') if links: if type(links) != list: links = [links] for link in links: match = APIDOC_RE.match(link) if match: self._api_doc = apidoc_iri = URIRef(match.groups()[0]) if apidoc_iri != self.identifier: apidoc = ApiDocumentation.from_iri(apidoc_iri, headers, http) g.addN(apidoc.graph.quads()) break self.__dict__['_graph'] = g return g
def __init__(self, location, repository, inmemory=False): super(RDFLibStore, self).__init__(location, repository) self.inmemory = inmemory self.closed = False graphid = URIRef("file://" + self.repository) g = ConjunctiveGraph(store=self._storeid(), identifier=graphid) if os.path.exists(self.location): g.open(self.location, create=False) else: g.open(self.location, create=True) l = logging.getLogger(__name__) if inmemory: l.debug("Loading store into memory") ig = ConjunctiveGraph(identifier=graphid) ig.addN(g.quads()) g.close() self.graph = ig else: l.debug("Using on-disk store") self.graph = g
def store(self, cur_g, base_dir, base_iri, context_path, tmp_dir=None, override=False, already_processed={}, store_now=True): self.repok.new_article() self.reperr.new_article() if len(cur_g) > 0: cur_subject = set(cur_g.subjects(None, None)).pop() cur_dir_path, cur_file_path = find_paths( str(cur_subject), base_dir, base_iri, self.dir_split, self.n_file_item) try: if not os.path.exists(cur_dir_path): os.makedirs(cur_dir_path) final_g = ConjunctiveGraph() final_g.addN([item + (cur_g.identifier,) for item in list(cur_g)]) # Merging the data if not override: if cur_file_path in already_processed: stored_g = already_processed[cur_file_path] stored_g.addN(final_g.quads((None, None, None, None))) final_g = stored_g elif os.path.exists(cur_file_path): # This is a conjunctive graps that contains all the triples (and graphs) # the file is actually defining - they could be more than those using # 'cur_subject' as subject. final_g = self.load(cur_file_path, cur_g, tmp_dir) already_processed[cur_file_path] = final_g if store_now: self.__store_in_file(final_g, cur_file_path, context_path) return already_processed except Exception as e: self.reperr.add_sentence("[5] It was impossible to store the RDF statements in %s. %s" % (cur_file_path, str(e))) return None
txy_list.append((t, x, y)) accident_url_list.append(ident) yield from accident_coverage_triples(txy_list, accident_url_list) parser = ArgumentParser() parser.add_argument( '--tweets', type=FileType('r'), default='data/tweets.json' ) parser.add_argument( '--streets', type=FileType('r'), default='data/streets.json' ) parser.add_argument( '--out', type=FileType('wb'), default='data/accidents.ttl' ) if __name__ == '__main__': args = parser.parse_args() streets = load(args.streets) tweets = load(args.tweets) print('Loaded {} tweets and {} streets'.format(len(tweets), len(streets))) fuseki = ConjunctiveGraph(store='SPARQLUpdateStore') fuseki.open(('http://localhost:3030/accidents/query', 'http://localhost:3030/accidents/update')) default = 'urn:x-arq:DefaultGraph' add_namespaces(fuseki) fuseki.addN((s,p,o,default) for s,p,o in build_graph(tweets)) fuseki.close()
def ProjectRdf(data): g = ConjunctiveGraph() graph = URIRef(app.config["BASE_URI"] + "graph/" + data["Id"]) proj = URIRef(app.config["BASE_URI"] + "project/" + data["Id"]) DOAP = Namespace("http://usefulinc.com/ns/doap#") g.addN([(proj, RDF.type, FOAF.Project, graph)]) g.addN([(proj, DCTERMS.title, Literal(data["Title"]), graph)]) g.addN([(proj, DCTERMS.description, Literal(data["Description"]), graph)]) g.addN([(proj, DOAP.GitRepository, URIRef(data["Repository"]), graph)]) if data["Homepage"]: g.addN([(proj, FOAF.homepage, URIRef(data["Homepage"]), graph)]) #authors for aut in data["Aut"]: aut_uri = URIRef(app.config["BASE_URI"] + "person/" + parse.quote( data["Aut"][aut]["Mail"].split("@")[0].replace(".", "_"), safe="")) g.addN([(proj, DCTERMS.creator, aut_uri, graph)]) g.addN([(aut_uri, RDF.type, FOAF.Person, graph)]) g.addN([(aut_uri, FOAF.givenName, Literal(data["Aut"][aut]["Name"]), graph)]) g.addN([(aut_uri, FOAF.familyName, Literal(data["Aut"][aut]["Surname"]), graph)]) g.addN([(aut_uri, FOAF.mbox, Literal(data["Aut"][aut]["Mail"]), graph) ]) #course course_id = parse.quote(data["Course"].replace(" ", "").replace( string.punctuation, ""), safe="")[:30] course_uri = URIRef(app.config["BASE_URI"] + "course/" + data["Year"].replace("-", "_") + "/" + course_id) g.addN([(proj, DCTERMS.subject, course_uri, graph)]) g.addN([(course_uri, RDF.type, DCTERMS.MethodOfInstruction, graph)]) g.addN([(course_uri, DCTERMS.title, Literal(data["Course"]), graph)]) g.addN([(course_uri, FOAF.homepage, URIRef(data["Course_url"]), graph)]) #Year year_uri = URIRef(app.config["BASE_URI"] + "year/" + data["Year"].replace("-", "_")) g.addN([(course_uri, DCTERMS.coverage, year_uri, graph)]) TIME = Namespace("http://www.w3.org/2006/time#") g.addN([(year_uri, RDF.type, TIME.TemporalEntity, graph)]) g.addN([(year_uri, RDFS.label, Literal(data["Year"]), graph)]) #Graph Metadata g.addN([(graph, DCTERMS.accessRights, Literal("SUSPENDED"), graph)]) g.addN([(graph, DCTERMS.dateSubmitted, Literal(str(data["Date"]), datatype=XSD.date), graph)]) pub_uri = URIRef(app.config["BASE_URI"] + "person/" + data["Responsible"].split("@")[0].replace(".", "_")) g.addN([(graph, DCTERMS.publisher, pub_uri, graph)]) return g
class MemoryStore: """A class that combines and syncronieses n-quad files and an in-memory quad store. This class contains information about all graphs, their corresponding URIs and pathes in the file system. For every Graph (context of Quad-Store) exists a FileReference object (n-quad) that enables versioning (with git) and persistence. """ def __init__(self): """Initialize a new MemoryStore instance.""" logger = logging.getLogger('quit.core.MemoryStore') logger.debug('Create an instance of MemoryStore') self.store = ConjunctiveGraph(identifier='default') return def getgraphuris(self): """Method to get all available named graphs. Returns: A list containing all graph uris found in store. """ graphs = [] for graph in self.store.contexts(): if isinstance(graph, BNode) or str(graph.identifier) == 'default': pass else: graphs.append(graph.identifier) return graphs def getgraphcontent(self, graphuri): """Get the serialized content of a named graph. Args: graphuri: The URI of a named graph. Returns: content: A list of strings where each string is a quad. """ data = [] context = self.store.get_context(URIRef(graphuri)) triplestring = context.serialize(format='nt').decode('UTF-8') # Since we have triples here, we transform them to quads by adding the graphuri # TODO This might cause problems if ' .\n' will be part of a literal. # Maybe a regex would be a better solution triplestring = triplestring.replace(' .\n', ' <' + graphuri + '> .\n') data = triplestring.splitlines() data.remove('') return data def getstoreobject(self): """Get the conjunctive graph object. Returns: graph: A list of strings where each string is a quad. """ def graphexists(self, graphuri): """Ask if a named graph FileReference object for a named graph URI. Args: graphuri: A string containing the URI of a named graph Returns: True or False """ if self.store.get_context(URIRef(graphuri)) is None: return False else: return True def addfile(self, filename, serialization): """Add a file to the store. Args: filename: A String for the path to the file. serialization: A String containg the RDF format Raises: ValueError if the given file can't be parsed as nquads. """ try: self.store.parse(source=filename, format=serialization) except Exception as e: logger.debug(e) logger.debug("Could not import file: {}. " + "Make sure the file exists and contains data in {}". format(filename, serialization)) def addquads(self, quads): """Add quads to the MemoryStore. Args: quads: Rdflib.quads that should be added to the MemoryStore. """ self.store.addN(quads) self.store.commit() def query(self, querystring): """Execute a SPARQL select query. Args: querystring: A string containing a SPARQL ask or select query. Returns: The SPARQL result set """ return self.store.query(querystring) def update(self, querystring, versioning=True): """Execute a SPARQL update query and update the store. This method executes a SPARQL update query and updates and commits all affected files. Args: querystring: A string containing a SPARQL upate query. """ # methods of rdflib ConjunciveGraph if versioning: actions = evalUpdate(self.store, querystring) self.store.update(querystring) return actions else: self.store.update(querystring) return return def removequads(self, quads): """Remove quads from the MemoryStore. Args: quads: Rdflib.quads that should be removed to the MemoryStore. """ self.store.remove((quads)) self.store.commit() return def exit(self): """Execute actions on API shutdown.""" return