def discussion_as_graph(self, discussion_id): from assembl.models import Discussion, AgentProfile local_uri = self.local_uri() discussion = Discussion.get(discussion_id) d_storage_name = self.discussion_storage_name() d_graph_iri = URIRef(self.discussion_graph_iri()) v = get_virtuoso(self.session, d_storage_name) discussion_uri = URIRef( Discussion.uri_generic(discussion_id, local_uri)) subjects = [s for (s,) in v.query( """SELECT DISTINCT ?s WHERE { ?s assembl:in_conversation %s }""" % (discussion_uri.n3()))] subjects.append(discussion_uri) participant_ids = list(discussion.get_participants(True)) profiles = {URIRef(AgentProfile.uri_generic(id, local_uri)) for id in participant_ids} subjects.extend(profiles) # add pseudo-accounts subjects.extend((URIRef("%sAgentAccount/%d" % (local_uri, id)) for id in participant_ids)) # print len(subjects) cg = ConjunctiveGraph(identifier=d_graph_iri) self.add_subject_data(v, cg, subjects) # add relationships of non-pseudo accounts for ((account, p, profile), g) in v.triples((None, SIOC.account_of, None)): if profile in profiles: cg.add((account, SIOC.account_of, profile, g)) # Tempting: simplify with this. # cg.add((profile, FOAF.account, account, g)) for (s, o, g) in v.query( '''SELECT ?s ?o ?g WHERE { GRAPH ?g {?s catalyst:expressesIdea ?o } . ?o assembl:in_conversation %s }''' % (discussion_uri.n3())): cg.add((s, CATALYST.expressesIdea, o, g)) return cg
def delete_individual_recursive(self, uri): if type(uri) == str: uri = URIRef(uri) db = get_db() db_cursor = db.cursor() stack = [uri] deleted = [] while stack: u = stack.pop() deleted.append(str(u)) stack += self.get_individual_children(u) # remove links from parents self.graph.remove((None, None, u)) db_cursor.execute( 'UPDATE knowledge SET deleted = ? WHERE subgraph_id = ? AND object = ? AND deleted IS NULL', (uri.n3(), self.id, u.n3())) # remove links to children self.graph.remove((u, None, None)) db_cursor.execute( 'UPDATE knowledge SET deleted = ? WHERE subgraph_id = ? AND subject = ? AND deleted IS NULL', (uri.n3(), self.id, u.n3())) db.commit() self.root = None # forces a rebuild of the root entity return deleted
class Context(GraphOperations): def __init__(self, parent, identifier=None, meta=None): self.parent = parent if identifier is None: identifier = BNode() self.identifier = URIRef(identifier) self.meta = MetaData(self, meta) self.meta.generate() @property def graph(self): if not hasattr(self, '_graph') or self._graph is None: if self.parent.buffered: self._graph = Graph(identifier=self.identifier) else: self._graph = self.parent.graph.get_context(self.identifier) return self._graph def add(self, schema, data): """ Stage ``data`` as a set of statements, based on the given ``schema`` definition. """ binding = self.get_binding(schema, data) uri, triples = triplify(binding) for triple in triples: self.graph.add(triple) return uri def save(self): """ Transfer the statements in this context over to the main store. """ if self.parent.buffered: query = """ INSERT DATA { GRAPH %s { %s } } """ query = query % (self.identifier.n3(), self.graph.serialize(format='nt')) self.parent.graph.update(query) self.flush() else: self.meta.generate() def delete(self): """ Delete all statements matching the current context identifier from the main store. """ if self.parent.buffered: query = 'CLEAR SILENT GRAPH %s ;' % self.identifier.n3() self.parent.graph.update(query) self.flush() else: self.graph.remove((None, None, None)) def flush(self): """ Clear all the pending statements in the local context, without transferring them to the main store. """ self._graph = None def __str__(self): return self.identifier def __repr__(self): return '<Context("%s")>' % self.identifier
def __new__(cls, uri_str): u = URIRef(uri_str) try: u.n3() except Exception: raise ValueError(f'{uri_str} does not look like a valid URI') return uri_str
def discussion_as_graph(self, discussion_id): self.ensure_discussion_storage(None) from assembl.models import Discussion d_storage_name = self.discussion_storage_name() d_graph_iri = URIRef(self.discussion_graph_iri()) v = get_virtuoso(self.session, d_storage_name) discussion_uri = URIRef( Discussion.uri_generic(discussion_id, self.local_uri())) subjects = list(v.query( """SELECT DISTINCT ?s WHERE { ?s assembl:in_conversation %s }""" % (discussion_uri.n3()))) subjects.append([discussion_uri]) # print len(subjects) cg = ConjunctiveGraph(identifier=d_graph_iri) for (s,) in subjects: # Absurdly slow. DISTINCT speeds up a lot, but I get numbers. for p, o in v.query( 'SELECT ?p ?o WHERE { graph %s { %s ?p ?o }}' % ( d_graph_iri.n3(), s.n3())): cg.add((s, p, o)) for (s, o, g) in v.query( '''SELECT ?s ?o ?g WHERE { GRAPH ?g {?s catalyst:expressesIdea ?o } . ?o assembl:in_conversation %s }''' % (discussion_uri.n3())): cg.add((s, CATALYST.expressesIdea, o, g)) # TODO: Add roles return cg
def _compare_pointers(self, synset: dict, pointer_name: str, pointer_uri: URIRef): """""" compare = True # pointers pairs = [] pairso = [] pairsd = [] # find pairs with source in this synset if pointer_name in synset: for pointer in synset[pointer_name]: # source senses/synset source = self._get_source_target(synset, pointer, "source_word") # target senses/synset target_synset = self.docs[pointer["target_synset"]] target = self._get_source_target(target_synset, pointer, "target_word") # pairs if source and target: pairsd.append((source, target)) # finds pointers doc_id = synset["doc_id"] synset_uri = SYNSETPT[doc_id] query = ("SELECT ?ss ?sw ?swl ?ts ?tw ?twl WHERE{{" "?ss {pointer} ?ts ." "{synset} {hassens} ?ss ." "?ss {hasword} ?sw . ?sw {lexical} ?swl ." "?ts {hasword} ?tw . ?tw {lexical} ?twl . }}") result = self.graph.query( query.format(synset=synset_uri.n3(), hassens=SCHEMA.containsWordSense.n3(), pointer=pointer_uri.n3(), hasword=SCHEMA.word.n3(), lexical=SCHEMA.lexicalForm.n3())) # compares words in synset with dump for _, _, source_word, _, _, target_word in result: source_word = source_word.toPython().strip() target_word = target_word.toPython().strip() pair = (source_word, target_word) # checks if pair exists in dump if pair in pairsd: pairs.append(pair) pairsd.remove(pair) else: pairso.append(pair) # check if unique words are void if len(pairsd) > 0: compare = False if len(pairso) > 0: compare = False return compare, pairs, pairsd, pairso
def instance_view_jsonld(request): from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager from rdflib import URIRef, ConjunctiveGraph ctx = request.context user_id = authenticated_userid(request) or Everyone permissions = get_permissions( user_id, ctx.get_discussion_id()) instance = ctx._instance if not instance.user_can(user_id, CrudPermissions.READ, permissions): return HTTPUnauthorized() discussion = ctx.get_instance_of_class(Discussion) if not discussion: raise HTTPNotFound() aqsm = AssemblQuadStorageManager() uri = URIRef(aqsm.local_uri() + instance.uri()[6:]) d_storage_name = aqsm.discussion_storage_name(discussion.id) v = get_virtuoso(instance.db, d_storage_name) cg = ConjunctiveGraph(v, d_storage_name) result = cg.triples((uri, None, None)) #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri) # Something is wrong here. triples = '\n'.join([ '%s %s %s.' % (uri.n3(), p.n3(), o.n3()) for (s, p, o) in result if '_with_no_name_entry' not in o]) return aqsm.quads_to_jsonld(triples)
def export_dataset( sparql_endpoint: SPARQLEndpoint = None, s3_endpoint: S3ObjectStore = None, data_source_code: str = None, graph_iri: rdflib.URIRef = None, mime=MIME_NTRIPLES ) -> bool: log_rule(data_source_code) log_item("Exporting Dataset", data_source_code) log_item("Named Graph IRI", graph_iri.n3()) r = requests.get( sparql_endpoint.endpoint_url(), auth=(sparql_endpoint.user_id(), sparql_endpoint.password()), params={'graph': graph_iri}, headers={'Accept': mime}, stream=True ) sparql_endpoint.handle_error(r) content_encoding = r.headers.get('Content-Encoding') # log_item("Content Encoding", content_encoding) # log_item("Response Encoding", r.encoding) log_item('Uploading as', _s3_file_name(mime, data_source_code)) uploader = s3_endpoint.uploader_for( key=_s3_file_name(mime, data_source_code), mime=mime, content_encoding=content_encoding, dataset_code=data_source_code ) chunk_size = 5 * 1024 * 1024 # 5Mb is minimum # for chunk in r.iter_lines(chunk_size=chunk_size, decode_unicode=False): # for chunk in r.raw.stream(amt=chunk_size, decode_content=None): for chunk in iter_raw(r, chunk_size=chunk_size): uploader.part(chunk) return uploader.complete()
def bulk_update(self, named_graph, graph, size, is_add=True): """ Bulk adds or deletes. Triples are chunked into n size groups before sending to API. This prevents the API endpoint from timing out. """ context = URIRef(named_graph) total = len(graph) if total > 0: for set_size, nt in self.nt_yielder(graph, size): if is_add is True: logger.debug("Adding {} statements to <{}>.".format(set_size, named_graph)) self.update(u'INSERT DATA { GRAPH %s { %s } }' % (context.n3(), nt)) else: logger.debug("Removing {} statements from <{}>.".format(set_size, named_graph)) self.update(u'DELETE DATA { GRAPH %s { %s } }' % (context.n3(), nt)) return total
def get_synthesis_contributors(self, id_only=True): # author of important extracts from .idea_content_link import Extract from .auth import AgentProfile from .post import Post from sqlalchemy.sql.functions import count local_uri = AssemblQuadStorageManager.local_uri() discussion_storage = \ AssemblQuadStorageManager.discussion_storage_name() idea_uri = URIRef(self.uri(local_uri)) clause = '''select distinct ?annotation where { %s idea:includes* ?ideaP . ?annotation assembl:resourceExpressesIdea ?ideaP }''' extract_ids = [x for (x,) in self.db.execute( SparqlClause(clause % ( idea_uri.n3(),), quad_storage=discussion_storage.n3()))] r = list(self.db.query(AgentProfile.id, count(Extract.id)).join( Post, Post.creator_id==AgentProfile.id).join(Extract).filter( Extract.important == True, Extract.id.in_(extract_ids))) r.sort(key=lambda x: x[1], reverse=True) if id_only: return [AgentProfile.uri_generic(a) for (a, ce) in r] else: ids = [a for (a, ce) in r] order = {id: order for (order, id) in enumerate(ids)} agents = self.db.query(AgentProfile).filter(AgentProfile.id.in_(ids)).all() agents.sort(key=lambda a: order[a.id]) return agents
def view(rtype, rid): if '.' in rid: rid, suffix = rid.rsplit('.', 1) else: suffix = None path = rtype + '/' + rid uri = URIRef(app.config['RESOURCE_BASE'] + path) #if template: services = app.config['SERVICES'] rq = render_template("queries/%s.rq" % rtype, prefixes=RQ_PREFIXES, this=uri.n3(), services=services) fmt = _conneg_format(suffix) if fmt == 'rq': return rq, 200, {'Content-Type': 'text/plain'} res = run_query(app.config['ENDPOINT'], rq) #else: # url = data_base + path + '.n3' # res = requests.get(url) graph = to_graph(res.content) this = graph.resource(uri) if fmt in ('html', 'xhtml'): return render_template(rtype + '.html', path=path, this=this, curies=graph.qname) else: headers = {'Content-Type': MIMETYPES.get(fmt) or 'text/plain'} fmt = {'rdf': 'xml', 'ttl': 'turtle'}.get(fmt) or fmt return graph.serialize(format=fmt), 200, headers
def instance_view_jsonld(request): from assembl.semantic.virtuoso_mapping import AssemblQuadStorageManager from rdflib import URIRef, ConjunctiveGraph ctx = request.context user_id = authenticated_userid(request) or Everyone permissions = get_permissions(user_id, ctx.get_discussion_id()) instance = ctx._instance if not instance.user_can(user_id, CrudPermissions.READ, permissions): return HTTPUnauthorized() discussion = ctx.get_instance_of_class(Discussion) if not discussion: raise HTTPNotFound() aqsm = AssemblQuadStorageManager() uri = URIRef(aqsm.local_uri() + instance.uri()[6:]) d_storage_name = aqsm.discussion_storage_name(discussion.id) v = get_virtuoso(instance.db, d_storage_name) cg = ConjunctiveGraph(v, d_storage_name) result = cg.triples((uri, None, None)) #result = v.query('select ?p ?o ?g where {graph ?g {<%s> ?p ?o}}' % uri) # Something is wrong here. triples = '\n'.join([ '%s %s %s.' % (uri.n3(), p.n3(), o.n3()) for (s, p, o) in result if '_with_no_name_entry' not in o ]) return aqsm.quads_to_jsonld(triples)
def match_sparql_ask(transition, event, token, fsa): """ The 'sparql-ask' matcher. With this matcher, transition conditions are interpreted as the WHERE clause of a SPARQL Ask query, where variable ?obs is bound to the considered obsel, and prefix m: is bound to the source trace URI. """ m_ns = fsa.source.model_uri if m_ns[-1] != '/' and m_ns[-1] != '#': m_ns += '#' history = token and token.get('history_events') if history: pred = URIRef(history[-1]) first = URIRef(history[0]) else: pred = None first = None condition = transition['condition'] ## this would be the correct way to do it # initBindings = { "obs": URIRef(event), "pred": pred, "first": first } ## unfortunately, Virtuoso does not support VALUES clauses after the ASK clause, ## which is how SPARQLUpdateStore handles initBindings ## so we generate that clause in the condition instead condition = """ BIND (%s as ?obs) BIND (%s as ?pred) BIND (%s as ?first) """ % ( URIRef(event).n3(), pred.n3() if pred else '""', # simulating NULL first.n3() if first else '""', # simulating NULL ) + condition ## thank you for nothing Virtuoso :-( return fsa.source_obsels_graph.query( "ASK { %s }" % condition, initNs={ "": KTBS, "m": m_ns }, # initBindings=initBindings, # not supported by Virtuoso :-( ).askAnswer
class ContentProvider(object): #implements(IContentProvider) def __init__(self, config): self.config = config self.jena = JenaHelper(config) self.harvester = URIRef(config['harvesteruri']) data = self.jena.graphquery("DESCRIBE %s" % self.harvester.n3()) #import pdb; pdb.set_trace() self.originatingSource = data.value(self.harvester, ANDS.originatingSource) or Literal(config['originatingsource']) self.groupDescription = data.value(self.harvester, ANDS.groupDescription) or Literal(config['groupdescription']) self.item_sparql_query = resource_string(__name__, "item_sparql.sparql") def set_logger(self, log): """Set the logger instance for this class """ self.log = log def update(self, from_date=None): """Harvests new content added since from_date returns a list of content_ids that were changed/added, this should be called before get_contents is called """ query = resource_string(__name__, "items_to_harvest.sparql") self._content = self.jena.selectquery(query % {'harvester': self.harvester}) return self._content def count(self): """Returns number of content objects in the repository returns None if number is unknown, this should not be called before update is called """ return len(self._content) def get_content_ids(self): """returns a list/generator of content_ids """ return self._content def get_content_by_id(self, id): """Return content of a specific id """ # assume id is URIRef instance g = Graph(identifier=URIRef(id)) #print self.item_sparql_query %{"subject":id, 'harvester': self.harvester} data = self.jena.graphquery(self.item_sparql_query %{"subject": id, 'harvester': self.harvester}, g) #print data # FIXME: make tese conditional data.add((URIRef(id), ANDS.originatingSource, self.originatingSource)) data.add((URIRef(id), ANDS.groupDescription, self.groupDescription)) return data
def test_creation_with_unknown_ns(self): uri = 'http://localhost:8000/resource/aggregation/ton-smits-huis/454' predicate = RDFPredicate(uri) graph = Graph() graph.add((URIRef(uri), FOAF.name, Literal("sjoerd"))) subject = list(graph.subjects())[0] uri_ref = URIRef(uri) assert uri_ref.n3() == "ns1:454" assert predicate is not None assert predicate.label is not None
def match_sparql_ask(transition, event, token, fsa): """ The 'sparql-ask' matcher. With this matcher, transition conditions are interpreted as the WHERE clause of a SPARQL Ask query, where variable ?obs is bound to the considered obsel, and prefix m: is bound to the source trace URI. """ m_ns = fsa.source.model_uri if m_ns[-1] != '/' and m_ns[-1] != '#': m_ns += '#' history = token and token.get('history_events') if history: pred = URIRef(history[-1]) first = URIRef(history[0]) else: pred = None first = None condition = transition['condition'] ## this would be the correct way to do it # initBindings = { "obs": URIRef(event), "pred": pred, "first": first } ## unfortunately, Virtuoso does not support VALUES clauses after the ASK clause, ## which is how SPARQLUpdateStore handles initBindings ## so we generate that clause in the condition instead condition = """ BIND (%s as ?obs) BIND (%s as ?pred) BIND (%s as ?first) """ % ( URIRef(event).n3(), pred.n3() if pred else '""', # simulating NULL first.n3() if first else '""', # simulating NULL ) + condition ## thank you for nothing Virtuoso :-( return fsa.source_obsels_graph.query( "ASK { %s }" % condition, initNs={"": KTBS, "m": m_ns}, # initBindings=initBindings, # not supported by Virtuoso :-( ).askAnswer
def new_value(self, entity_uri, property_uri): if type(entity_uri) == str: entity_uri = URIRef(entity_uri) if type(property_uri) == str: property_uri = URIRef(property_uri) index = self.get_property_free_index(entity_uri, property_uri) value = Literal('') self.graph.add((entity_uri, property_uri, value)) self.properties[(entity_uri, property_uri)][index] = value db = get_db() db.execute( 'INSERT INTO knowledge (subgraph_id, subject, predicate, object, property_index) VALUES (?, ?, ?, ?, ?)', (self.id, entity_uri.n3(), property_uri.n3(), value.n3(), index)) db.commit() self.root = None # forces a rebuild of the root entity return index
def undo_delete_individual(self, uri): if type(uri) == str: uri = URIRef(uri) db = get_db() rows = db.execute( 'SELECT subject, predicate, object FROM knowledge WHERE subgraph_id = ? AND deleted = ?', (self.id, uri.n3())).fetchall() for row in rows: self.graph.add(row_to_rdf(row)) db.execute( 'UPDATE knowledge SET deleted = NULL WHERE subgraph_id = ? AND deleted = ?', (self.id, uri.n3())) db.commit() self.root = None # forces a rebuild of the root entity
def check_valid_uri(self, uri): """ checks to see if a uri is valid """ valid = False if isinstance(uri, str): uri_out = False try: uri_test = URIRef(uri) uri_out = uri_test.n3() except: # some sort of error thrown, so not valid valid = False if isinstance(uri_out, str): valid = True return valid
def term(self, term, use_prefix=True): if isinstance(term, Namespace): term = URIRef(term) if term is None: return RDF.nil elif not hasattr(term, 'n3'): return self.term(Literal(term)) elif use_prefix and isinstance(term, URIRef): return self.uri(term) elif isinstance(term, Literal): if term.datatype in (XSD.double, XSD.integer, XSD.float, XSD.boolean): return unicode(term).lower() elif isinstance(term, Namespace): return unicode(term) return term.n3()
def get_idea_ids_showing_post(cls, post_id): "Given a post, give the ID of the ideas that show this message" # This works because of a virtuoso bug... # where DISTINCT gives IDs instead of URIs. from .generic import Content discussion_storage = \ AssemblQuadStorageManager.discussion_storage_name() post_uri = URIRef(Content.uri_generic( post_id, AssemblQuadStorageManager.local_uri())) return [int(id) for (id,) in cls.default_db.execute(SparqlClause( '''select distinct ?idea where { %s sioc:reply_of* ?post . ?post assembl:postLinkedToIdea ?ideaP . ?idea idea:includes* ?ideaP }''' % (post_uri.n3(),), quad_storage=discussion_storage.n3()))]
def change_label(self, entity_uri, label): entity_uri = URIRef(entity_uri) label = Literal(label) self.graph.remove((entity_uri, RDFS.label, None)) self.graph.add((entity_uri, RDFS.label, label)) db = get_db() db.execute( 'UPDATE knowledge SET object = ? ' ' WHERE subgraph_id = ? AND subject = ? AND predicate = ?', (label.n3(), self.id, entity_uri.n3(), RDFS.label.n3())) db.commit() self.root = None # forces a rebuild of the root entity
def get_user_by_uri(self, *, user_uri: URIRef) -> FoodKgUser: """ Query the User KG to retrieve a FoodKGUser object with the target URI. :param user_uri: the URI of the user to return :return: a FoodKGUser object with the target URI """ res_graph = self.get_cache_graph(sparql=J2QueryStrService.j2_query( file_name="construct_user_query", constraints=[{ "var_name": "?userUri", "var_values": [user_uri.n3()] }], )) user = self._graph_get_user_by_uri(user_uri=user_uri) return user
def canvas_and_images_graph(graph, canvas_uri): canvas_uri = URIRef(canvas_uri) canvas_graph = Graph() canvas_graph += graph.triples((canvas_uri, None, None)) qres = graph.query("""SELECT ?image_anno ?image WHERE { ?image_anno a oa:Annotation . ?image_anno oa:hasTarget %s . ?image_anno oa:hasBody ?image . ?image a ?type . FILTER(?type = dcmitype:Image || ?type = dms:Image || ?type = dms:ImageChoice) . }""" % (canvas_uri.n3()), initNs=ns) for image_anno, image in qres: canvas_graph += graph.triples_choices(([image_anno, image], None, None)) return canvas_graph
def cli(kb, input): g, og = read(input) for spo, c in g.store.triples((None, None, None), None): s, p, o = spo s = fixup(s) p = fixup(p) o = fixup(o) cc = URIRef(c.identifier, base=bbbb) spocc = (s, p, o, cc) r = "%s %s %s %s .\n" % (s.n3(), p.n3(), o.n3(), ('' if cc == bbbb else cc.n3())) # print() # print() print() print() for l in (og.serialize(format='nquads')).splitlines(): print(l.decode('utf8'))
def term(self, term, use_prefix=True): if isinstance(term, (Namespace, ClosedNamespace)): term = URIRef(namespace_to_uri(term)) if term is None: return RDF.nil elif not hasattr(term, 'n3'): return self.term(Literal(term)) elif use_prefix and isinstance(term, URIRef): return self.uri(term) elif isinstance(term, Literal): if term.datatype in (XSD.double, XSD.integer, XSD.float, XSD.boolean): return unicode(term).lower() elif use_prefix and term.datatype: # Abbreviate datatype if possible datatype_term = self.uri(term.datatype) return '"%s"^^%s' % (term, datatype_term) elif isinstance(term, Namespace): return unicode(term) return term.n3()
def getEntity(self, reference, makeIfDoesNotExist: bool = False): """Returns a proxy to an entity in the ontology. If the object corresponding to the reference already exists in the database then simply the reference is returned. If it does not exist, it will be created. Parameters ---------- reference : Identifier [description] makeIfDoesNotExist : bool If set to True, a blank entity with the specified reference will be created and returned. I.e., an entity will be created even if one does not exist within the database. Returns ------- OntoEntity A proxy for the entity in the ontology. """ # create empty object as a default return value if no object exist and create is False obj = None # make sure reference is a valid identifier if isinstance(reference, str): reference = URIRef(reference) # create reference string refString = reference.n3() # check if the referenced objects is remembered by the API if refString in self.__objects: obj = self.__objects[refString] # check if the referenced object still exists within the DB if not obj.exists: # if the object is no longer in DB, remove it from the dict and return None del self.__objects[refString] return None else: # check if the reference exist within the database if self.existEntity(reference): # TODO: obj = # make new object but don't write into DB (it's already there) pass elif makeIfDoesNotExist: obj = self.makeEntity(reference) return obj
def get_contributors(self): # anyone who contributed to any of the idea's posts local_uri = AssemblQuadStorageManager.local_uri() discussion_storage = \ AssemblQuadStorageManager.discussion_storage_name() idea_uri = URIRef(self.uri(local_uri)) clause = '''select count(distinct ?postP), count(distinct ?post), ?author where { %s idea:includes* ?ideaP . ?postP assembl:postLinkedToIdea ?ideaP . ?post sioc:reply_of* ?postP . ?post sioc:has_creator ?author }''' r = self.db.execute( SparqlClause(clause % ( idea_uri.n3(),), quad_storage=discussion_storage.n3())) r = [(int(cpp), int(cp), 'local:AgentProfile/' + a.rsplit('/',1)[1] ) for (cpp, cp, a) in r] r.sort(reverse=True) return [a for (cpp, cp, a) in r]
def handle_GET(self, request, context): uri = URIRef(request.GET.get('uri', '')) types = self.get_types(uri) if not types: raise Http404 try: depth = max(0, min(int(request.GET.get('depth')), 3)) except (TypeError, ValueError): depth = 2 try: direction = self._DIRECTIONS[request.GET['direction']] except KeyError: direction = self._DIRECTIONS['both'] direction = direction % {'depth': depth} query = self._QUERY % {'uri': uri.n3(), 'direction': direction} graph = self.endpoint.query(query) subjects = [ Resource(s, graph, self.endpoint) for s in set(graph.subjects(NS.rdf.type)) ] hexhashes = set(s.hexhash for s in subjects) context.update({ 'graph': graph, 'queries': [graph.query], 'subjects': subjects, 'subject': Resource(uri, graph, self.endpoint), 'hexhashes': hexhashes, 'depth': depth, 'layout': request.GET.get('layout'), 'direction': request.GET.get('direction'), 'minimal': request.GET.get('minimal'), }) return self.render(request, context, 'citation-network')
def change_value(self, entity_uri, property_uri, index, value): if type(entity_uri) == str: entity_uri = URIRef(entity_uri) if type(property_uri) == str: property_uri = URIRef(property_uri) db = get_db() # delete previous value from graph prev_value = db.execute( 'SELECT object FROM knowledge ' ' WHERE subgraph_id = ? AND subject = ? AND predicate = ? AND property_index = ?', (self.id, entity_uri.n3(), property_uri.n3(), index)).fetchone() if prev_value: self.graph.remove((entity_uri, property_uri, parse_n3_term(prev_value['object']))) # add new value to graph validity, value = self.check_property_value(property_uri, value) if validity: # the check returned an error message del self.properties[(entity_uri, property_uri)][index] db.execute( 'UPDATE knowledge SET object = ? ' ' WHERE subgraph_id = ? AND subject = ? AND predicate = ? AND property_index = ?', (Literal('').n3(), self.id, entity_uri.n3(), property_uri.n3(), index)) db.commit() self.root = None return validity self.graph.add((entity_uri, property_uri, value)) self.properties[(entity_uri, property_uri)][index] = value db.execute( 'UPDATE knowledge SET object = ? ' ' WHERE subgraph_id = ? AND subject = ? AND predicate = ? AND property_index = ?', (value.n3(), self.id, entity_uri.n3(), property_uri.n3(), index)) db.commit() self.root = None # forces a rebuild of the root entity
class ContentProvider(object): #implements(IContentProvider) def __init__(self, config): self.config = config self.jena = JenaHelper(config) self.harvester = URIRef(config['harvesteruri']) data = self.jena.graphquery("DESCRIBE %s" % self.harvester.n3()) #import pdb; pdb.set_trace() self.originatingSource = data.value(self.harvester, ANDS.originatingSource) or Literal( config['originatingsource']) self.groupDescription = data.value(self.harvester, ANDS.groupDescription) or Literal( config['groupdescription']) self.item_sparql_query = resource_string(__name__, "item_sparql.sparql") def set_logger(self, log): """Set the logger instance for this class """ self.log = log def update(self, from_date=None): """Harvests new content added since from_date returns a list of content_ids that were changed/added, this should be called before get_contents is called """ query = resource_string(__name__, "items_to_harvest.sparql") self._content = self.jena.selectquery(query % {'harvester': self.harvester}) return self._content def count(self): """Returns number of content objects in the repository returns None if number is unknown, this should not be called before update is called """ return len(self._content) def get_content_ids(self): """returns a list/generator of content_ids """ return self._content def get_content_by_id(self, id): """Return content of a specific id """ # assume id is URIRef instance g = Graph(identifier=URIRef(id)) #print self.item_sparql_query %{"subject":id, 'harvester': self.harvester} data = self.jena.graphquery( self.item_sparql_query % { "subject": id, 'harvester': self.harvester }, g) #print data # FIXME: make tese conditional data.add((URIRef(id), ANDS.originatingSource, self.originatingSource)) data.add((URIRef(id), ANDS.groupDescription, self.groupDescription)) return data
def S(self, uri): if isinstance(uri, str): uri = URIRef(uri) return uri.n3(self.g.g.namespace_manager)
# the Free Software Foundation; either version 3, or (at your option) # any later version. # This file is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with n3; see the file LICENSE. If not, write to # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA, # Serializing a single term to N3 from rdflib import Graph, URIRef, Literal, BNode from rdflib.namespace import FOAF, NamespaceManager person = URIRef('http://xmlns.com/foaf/0.1/Person') print(person.n3()) g = Graph() print(g.bind("foaf", FOAF)) print(person.n3(g.namespace_manager)) l = Literal(2) print(l.n3()) print(l.n3(g.namespace_manager))
def fix_socrata_graph(g, dataset_dict, portal_url): dataset_ref = None # add additional info if 'view' in dataset_dict and isinstance(dataset_dict['view'], dict): data = dataset_dict['view'] try: identifier = data['id'] uri = '{0}/dataset/{1}'.format(portal_url.rstrip('/'), identifier) dataset_ref = URIRef(uri) # replace blank node by dataset reference dataset_node = g.value(predicate=DCT.identifier, object=Literal(identifier)) if dataset_node: for s, p, o in g.triples( (dataset_node, None, None) ): g.remove((s, p, o)) g.add((dataset_ref, p, o)) if (dataset_ref, RDF.type, DCAT.Dataset) not in g: g.add((dataset_ref, RDF.type, DCAT.Dataset)) # owner if 'owner' in data and isinstance(data['owner'], dict) and 'displayName' in data['owner']: owner = data['owner']['displayName'] # add owner as publisher # BNode: dataset_ref + DCT.publisher + owner bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + owner) publisher_details = BNode(bnode_hash.hexdigest()) g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) g.add((publisher_details, FOAF.name, Literal(owner))) # author if 'tableAuthor' in data and isinstance(data['tableAuthor'], dict) and 'displayName' in data['tableAuthor']: author = data['tableAuthor']['displayName'] # BNode: dataset_ref + VCARD.fn + author bnode_hash = hashlib.sha1(dataset_ref.n3() + VCARD.fn.n3() + author) contact_details = BNode(bnode_hash.hexdigest()) g.add((contact_details, RDF.type, VCARD.Organization)) g.add((dataset_ref, DCAT.contactPoint, contact_details)) g.add((contact_details, VCARD.fn, Literal(author))) #title #if 'name' in data: # g.add((dataset_ref, DCT.title, Literal('name'))) except Exception as e: pass try: # redesign distribution, format for ds, has_distr, dcat_download in g.triples((None, DCAT.distribution, None)): # create new distr # BNode: dataset_ref + dcat_download bnode_hash = hashlib.sha1(dataset_ref.n3() + dcat_download) distribution = BNode(bnode_hash.hexdigest()) # rewrite format for s, p, format_bnode in g.triples((dcat_download, DCT['format'], None)): format = g.value(format_bnode, RDFS.label) mime_type = g.value(format_bnode, RDF.value) # keep the blank node and add type MediaTypeOrExtent #g.remove((format_bnode, None, None)) #g.add((distribution, DCT['format'], format)) g.add((format, RDF.type, DCT.MediaTypeOrExtent)) # additionally add media type to distribution g.add((distribution, DCAT.mediaType, mime_type)) g.remove((s, p, format_bnode)) # add new distr g.add((ds, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # remove old dcat:Download g.remove((ds, has_distr, dcat_download)) g.remove((dcat_download, RDF.type, None)) # add links from old distribution for s, p, o in g.triples((dcat_download, None, None)): g.remove((s, p, o)) g.add((distribution, p, o)) except Exception as e: pass try: # created, modified keys ODS_created = URIRef('http://open-data-standards.github.com/2012/01/open-data-standards#created') ODS_modified = URIRef('http://open-data-standards.github.com/2012/01/open-data-standards#last_modified') for s, p, o in g.triples((None, ODS_created, None)): g.remove((s, p, o)) g.add((s, DCT.issued, o)) for s, p, o in g.triples((None, ODS_modified, None)): g.remove((s, p, o)) g.add((s, DCT.modified, o)) except Exception as e: pass if not dataset_ref: dataset_ref = g.value(predicate=RDF.type, object=DCAT.Dataset) return dataset_ref
def graph_from_data_gouv_fr(g, dataset_dict, portal_url): identifier = dataset_dict['id'] uri = dataset_dict['page'] # dataset subject dataset_ref = URIRef(uri) for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # identifier g.add((dataset_ref, DCT.identifier, Literal(identifier))) # Basic fields items = [ ('title', DCT.title, None), ('description', DCT.description, None), ('page', DCAT.landingPage, None), ('frequency', DCT.accrualPeriodicity, None), ] _add_triples_from_dict(g, dataset_dict, dataset_ref, items) # Tags for tag in dataset_dict.get('tags', []): if isinstance(tag, basestring): g.add((dataset_ref, DCAT.keyword, Literal(tag))) # Dates items = [ ('created_at', DCT.issued, None), ('last_modified', DCT.modified, ['last_update']) ] _add_date_triples_from_dict(g, dataset_dict, dataset_ref, items) # publisher publisher = dataset_dict.get('organization') if publisher and isinstance(publisher, dict): publisher_id = publisher.get('id') publisher_name = publisher.get('name') publisher_page = publisher.get('page') if publisher_page: publisher_details = URIRef(publisher_page) g.add((publisher_details, FOAF.homepage, URIRef(publisher_page))) else: # BNode: dataset_ref + DCT.publisher + publisher_name bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + publisher_id) publisher_details = BNode(bnode_hash.hexdigest()) g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((publisher_details, DCT.identifier, Literal(publisher_id))) g.add((dataset_ref, DCT.publisher, publisher_details)) if publisher_name: g.add((publisher_details, FOAF.name, Literal(publisher_name))) license = None license_id = dataset_dict.get('license') if license_id: id_string = dataset_ref.n3() + DCT.license.n3() + license_id bnode_hash = hashlib.sha1(id_string.encode('utf-8')) license = BNode(bnode_hash.hexdigest()) g.add((license, RDF.type, DCT.LicenseDocument)) g.add((license, DCT.identifier, Literal(license_id))) # Resources for resource_dict in dataset_dict.get('resources', []): distribution = URIRef(resource_dict['id']) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) # License if license: g.add((distribution, DCT.license, license)) # Simple values items = [ ('title', DCT.title, None), ('description', DCT.description, None), ('created_at', DCT.issued, None), ('last_modified', DCT.modified, None) ] _add_triples_from_dict(g, resource_dict, distribution, items) if resource_dict.get('format'): id_string = dataset_ref.n3() + DCT['format'].n3() + resource_dict['format'] bnode_hash = hashlib.sha1(id_string.encode('utf-8')) f = BNode(bnode_hash.hexdigest()) g.add((f, RDF.type, DCT.MediaTypeOrExtent)) g.add((f, RDFS.label, Literal(resource_dict['format']))) g.add((distribution, DCT['format'], f)) if resource_dict.get('mime'): g.add((f, RDF.value, Literal(resource_dict['mime']))) if resource_dict.get('mime'): g.add((distribution, DCAT.mediaType, Literal(resource_dict['mime']))) download_url = resource_dict.get('url') if download_url: download_url = download_url.strip() if is_valid_uri(download_url): g.add((distribution, DCAT.downloadURL, URIRef(download_url))) else: g.add((distribution, DCAT.downloadURL, Literal(download_url))) if resource_dict.get('filesize'): try: g.add((distribution, DCAT.byteSize, Literal(float(resource_dict['filesize']), datatype=XSD.decimal))) except (ValueError, TypeError): g.add((distribution, DCAT.byteSize, Literal(resource_dict['filesize']))) return dataset_ref
def graph_from_opendatasoft(g, dataset_dict, portal_url): # available: title, description, language, theme, keyword, license, publisher, references # additional: created, issued, creator, contributor, accrual periodicity, spatial, temporal, granularity, data quality identifier = dataset_dict['datasetid'] uri = '{0}/explore/dataset/{1}'.format(portal_url.rstrip('/'), identifier) # dataset subject dataset_ref = URIRef(uri) for prefix, namespace in namespaces.iteritems(): g.bind(prefix, namespace) g.add((dataset_ref, RDF.type, DCAT.Dataset)) # identifier g.add((dataset_ref, DCT.identifier, Literal(identifier))) data = dataset_dict['metas'] # Basic fields items = [ ('title', DCT.title, None), ('description', DCT.description, None), ] _add_triples_from_dict(g, data, dataset_ref, items) # Lists items = [ ('language', DCT.language, None), ('theme', DCAT.theme, None), ('keyword', DCAT.keyword, None), ] _add_list_triples_from_dict(g, data, dataset_ref, items) # publisher publisher_name = data.get('publisher') if publisher_name: # BNode: dataset_ref + DCT.publisher + publisher_name bnode_hash = hashlib.sha1(dataset_ref.n3() + DCT.publisher.n3() + publisher_name) publisher_details = BNode(bnode_hash.hexdigest()) g.add((publisher_details, RDF.type, FOAF.Organization)) g.add((dataset_ref, DCT.publisher, publisher_details)) g.add((publisher_details, FOAF.name, Literal(publisher_name))) # TODO any additional publisher information available? look for fields # Dates items = [ #('metadata_processed', DCT.issued, ['metadata_created']), ('modified', DCT.modified, ['metadata_processed', 'metadata_modified']), ] _add_date_triples_from_dict(g, data, dataset_ref, items) # references references = data.get('references') if references and isinstance(references, basestring) and bool(urlparse.urlparse(references).netloc): references = references.strip() if is_valid_uri(references): g.add((dataset_ref, RDFS.seeAlso, URIRef(references))) else: g.add((dataset_ref, RDFS.seeAlso, Literal(references))) # store licenses for distributions license = data.get('license') # distributions if dataset_dict.get('has_records'): exports = [('csv', 'text/csv'), ('json', 'application/json'), ('xls', 'application/vnd.ms-excel')] if 'geo' in dataset_dict.get('features', []): exports.append(('geojson', 'application/vnd.geo+json')) exports.append(('kml', 'application/vnd.google-earth.kml+xml')) # TODO shape files? # exports.append(('shp', 'application/octet-stream')) for format, mimetype in exports: # URL url = portal_url.rstrip('/') + '/api/records/1.0/download?dataset=' + identifier + '&format=' + format # BNode: dataset_ref + url id_string = dataset_ref.n3() + url bnode_hash = hashlib.sha1(id_string.encode('utf-8')) distribution = BNode(bnode_hash.hexdigest()) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if is_valid_uri(url): g.add((distribution, DCAT.accessURL, URIRef(url))) else: g.add((distribution, DCAT.accessURL, Literal(url))) # License if license: # BNode: distribution + url id_string = distribution.n3() + license bnode_hash = hashlib.sha1(id_string.encode('utf-8')) l = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT.license, l)) g.add((l, RDF.type, DCT.LicenseDocument)) g.add((l, RDFS.label, Literal(license))) # Format # BNode: distribution + format + mimetype id_string = distribution.n3() + format + mimetype bnode_hash = hashlib.sha1(id_string.encode('utf-8')) f = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT['format'], f)) g.add((f, RDF.type, DCT.MediaTypeOrExtent)) g.add((f, RDFS.label, Literal(format))) g.add((f, RDF.value, Literal(mimetype))) g.add((distribution, DCAT.mediaType, Literal(mimetype))) # Dates items = [ #('issued', DCT.issued, None), ('data_processed', DCT.modified, None), ] _add_date_triples_from_dict(g, data, distribution, items) # attachments for attachment in dataset_dict.get('attachments', []): # BNode: dataset_ref + url id_string = dataset_ref.n3() + attachment bnode_hash = hashlib.sha1(id_string.encode('utf-8')) distribution = BNode(bnode_hash.hexdigest()) g.add((dataset_ref, DCAT.distribution, distribution)) g.add((distribution, RDF.type, DCAT.Distribution)) if license: # BNode: distribution + url id_string = distribution.n3() + license bnode_hash = hashlib.sha1(id_string.encode('utf-8')) l = BNode(bnode_hash.hexdigest()) g.add((distribution, DCT.license, l)) g.add((l, RDF.type, DCT.LicenseDocument)) g.add((l, RDFS.label, Literal(license))) # Simple values items = [ ('title', DCT.title, None), ('mimetype', DCT.mediaType, None), ('format', DCT['format'], None), ] _add_triples_from_dict(g, attachment, distribution, items) # URL if attachment.get('id'): url = portal_url.rstrip('/') + '/api/datasets/1.0/' + identifier + '/attachments/' + attachment.get('id') g.add((distribution, DCT.accessURL, Literal(url))) return dataset_ref
def getShortUri(graph, uri): if type(uri) is URIRef: id = uri else: id = URIRef(uri) return id.n3(graph.namespace_manager)
def get_idea_ids_showing_post(cls, post_id, direct=False, indirect=True): "Given a post, give the ID of the ideas that show this message" # This works because of a virtuoso bug... # where DISTINCT gives IDs instead of URIs. from .generic import Content from .idea_content_link import Extract assert direct or indirect discussion_storage = \ AssemblQuadStorageManager.discussion_storage_name() post_uri = URIRef(Content.uri_generic( post_id, AssemblQuadStorageManager.local_uri())) if indirect and not direct: clause = '''select distinct ?idea where { %s sioc:reply_of* ?post . ?post assembl:postLinkedToIdea ?ideaP . ?idea idea:includes* ?ideaP }''' elif direct and not indirect: clause = '''select distinct ?idea where { %s sioc:reply_of* ?post . ?post assembl:postLinkedToIdea ?idea }''' if direct and indirect: # Not used anymore, to be cleaned. clause = '''select distinct ?postP, ?ideaP, ?idea, ?ex where { %s sioc:reply_of* ?postP . ?postP assembl:postLinkedToIdea ?ideaP . ?idea idea:includes* ?ideaP . optional { ?ex oa:hasSource ?postP ; assembl:resourceExpressesIdea ?ideaP . } }''' r = list(cls.default_db.execute( SparqlClause(clause % ( post_uri.n3(),), quad_storage=discussion_storage.n3()))) r = [(int(x), int(y), int(z), int(e) if e else None) for (x, y, z, e) in r] def comp((pp1, ip1, i1, e1), (pp2, ip2, i2, e2)): direct_idea1 = ip1 == i1 direct_idea2 = ip2 == i2 direct_post1 = pp1 == post_id direct_post2 = pp2 == post_id if direct_idea1 != direct_idea2: return -1 if direct_idea1 else 1 if direct_post1 != direct_post2: return -1 if direct_post1 else 1 if pp1 != pp2: # assume hry is congruent with post order. return pp2 - pp1 if ip1 != ip2: # TODO: Real hry order. Should be rare. return ip2 - ip1 if i1 != i2: # TODO: Real hry order. return i2 - i1 if e1 != e2: return e2 - e1 return 0 r.sort(cmp=comp) # can't trust virtuoso's uniqueness. r = [e for e, _ in groupby(r)] return [( Idea.uri_generic(i), Idea.uri_generic(ip), Content.uri_generic(pp), Extract.uri_generic(ex) if ex else None ) for (pp, ip, i, ex) in r]
def extract_task_data_each_query(rdf_graph, task: URIRef) -> sparql.Result: query_result: sparql.Result = rdf_graph.query( Template(task_data_each_query_template).substitute(task=task.n3())) assert len(query_result) > 0 return query_result
def extract_task_meta_data(rdf_graph: Graph, task: URIRef) -> sparql.ResultRow: query_result: sparql.Result = rdf_graph.query( Template(task_meta_data_template).substitute(task=task.n3())) assert len(query_result) == 1 return next(iter(query_result))