def __get_by_subquery(self, params): context = params.get("context", None) inner_query = select("?s") inner_params = params.copy() if "order" in params: # "order" needs to stay in subquery, # but doesn't do anything useful in main query del params["order"] self.__apply_limit_offset_order_get_by_filter(inner_params, inner_query) query = select("?s", "?p", "?v", "?c").distinct() query.group(('?s', '?p', '?v'), optional_group(('?v', a, '?c'))) query.where(inner_query) if not (context is None): query.from_(context) # Need ordering in outer query if "order" in params: if params["order"] == True: # Order by subject URI query.order_by("?s") else: # Match another variable, order by it query.optional_group(("?s", params["order"], "?order")) query.order_by("?order") table = self._to_table(self._execute(query)) subjects = {} results = [] for match in table: # Make sure subject and predicate are URIs (they have to be!), # this works around bug in Virtuoso -- it sometimes returns # URIs as Literals. subject = URIRef(match["s"]) predicate = URIRef(match["p"]) value = match["v"] # Add subject to result list if it's not there if not subject in subjects: instance_data = {"direct" : {}} subjects[subject] = instance_data results.append((subject, instance_data)) # Add predicate to subject's direct predicates if it's not there direct_attributes = subjects[subject]["direct"] if not predicate in direct_attributes: direct_attributes[predicate] = {} # Add value to subject->predicate if ... predicate_values = direct_attributes[predicate] if not value in predicate_values: predicate_values[value] = [] # Add RDF type of the value to subject->predicate->value list if "c" in match: predicate_values[value].append(match["c"]) return results
def get_schema_label(uri,prefix,format): #store = surf.Store(reader='rdflib',writer='rdflib',rdflib_store='IOMemory') store = surf.Store(**{"reader": "librdf", "writer" : "librdf", }) session = surf.Session(store) try: store.load_triples(source='file://'+settings.PROJECT_PATH+'/d2rq/vocab/'+prefix+'.'+format) print "Loading from file : "+prefix+"."+format except: store.load_triples(source=uri) print "Loading from Internet : "+uri #store.enable_logging(True) ontology = session.get_class(ns.OWL.Ontology) if(ontology.all().first()): #contient un node Ontology print "Le schema est une ontologie OWL" vocab_type = 'owl' titles = ['rdfs_comment','rdfs_label','dc_title','dcterms_title'] #import pdb; pdb.set_trace() for o in ontology.all(): for title in titles: if(len(o.__getattr__(title))>0): lookup = literal_lang_select(o.__getattr__(title)) if(lookup != None):label=lookup if not label: label = unicode(uri) else: # là c'est la vraie methode, #au-dessus c'est juste pour les OWL dont l'URL ≠ de l'URI sujet #donc pas forcément obligatoire à garder non plus try: print "Le schema est un vocabulaire RDFS" vocab_type = 'rdfs' label = get_label(rdflib.term.URIRef(uri),uri,store) except: raise lookup_args = { 'owl': (ns.OWL.Class,ns.OWL.ObjectProperty), 'rdfs': (ns.RDFS.Class,ns.RDF.Property)} vocab_classes = list(store.reader._to_table(store.reader._execute( select("?s").where(("?s", a, lookup_args[vocab_type][0])).filter('(regex(str(?s),"'+uri+'","i"))')# vocab own classes only ))) vocab_proprietes = list(store.reader._to_table(store.reader._execute( select("?s").where(("?s", a, lookup_args[vocab_type][1])).filter('(regex(str(?s),"'+uri+'","i"))') ))) cp = {'classes':[],'proprietes':[]} for items in ((vocab_classes,'classes'),(vocab_proprietes,'proprietes')): for triple in items[0]: if(isinstance(triple['s'],rdflib.term.URIRef) and bool(re.compile(uri).match(unicode(triple['s'])))): #ugly patch waiting for query to work clabel = get_label(triple['s'],uri,store) #on envoie le subject pour l'identifier cp[items[1]].append((unicode(triple['s']),clabel)) print items[1]+':'+clabel store.clear() store.close() session.close() return((label,vocab_type,cp))
def __get_by_n_queries(self, params): context = params.get("context", None) query = select("?s") if not (context is None): query.from_(context) self.__apply_limit_offset_order_get_by_filter(params, query) # Load details, for now the simplest approach with N queries. # Use _to_table instead of convert to preserve order. results = [] for match in self._to_table(self._execute(query)): subject = match["s"] instance_data = {} result = self._execute(query_S(subject, True, context)) result = self.convert(result, 'p', 'v', 'c') instance_data["direct"] = result if not params.get("only_direct"): result = self._execute(query_S(subject, False, context)) result = self.convert(result, 'p', 'v', 'c') instance_data["inverse"] = result results.append((subject, instance_data)) return results
def process(self, input, output): print 'processing ' + input.subject # prefix void: <http://rdfs.org/ns/void#> # # select ?from ?to ?overlap # where { # ?from void:subset [ # a void:Linkset; # void:target ?from, # ?to; # void:triples ?overlap; # ] . # filter( ?from != ?to ) # } query = select('?to ?overlap').where((input.subject, ns.VOID['subset'], '?linkset'), ('?linkset', a, ns.VOID['Linkset']), ('?linkset', ns.VOID['target'], input.subject), ('?linkset', ns.VOID['target'], '?to'), ('?linkset', ns.VOID['triples'], '?overlap')).filter('(?to != <'+input.subject+'>)') results = input.session.default_store.execute(query) for binding in results: target = binding[0] overlap = binding[1] #print target + ' ' + overlap if overlap >= 50: # "We arbitrarily require at least 50 links." -http://richard.cyganiak.de/2007/10/lod/ output.rdf_type.append(ns.DATAFAQS['Satisfactory']) if ns.DATAFAQS['Satisfactory'] not in output.rdf_type: output.rdf_type.append(ns.DATAFAQS['Unsatisfactory']) output.save()
def test_same_as_inference_works(self): """ Test owl:sameAs inferencing. """ store, session = self._get_store_session() self._create_persons(session) # Let's say Jonathan is the same Person as John Person = session.get_class(surf.ns.FOAF["Person"]) john = session.get_resource("http://John", Person) john.load() jonathan = session.get_resource("http://Jonathan", Person) jonathan.foaf_homepage = 'http://example.com' john[surf.ns.OWL['sameAs']] = jonathan session.commit() store.reader.define = 'input:same-as "yes"' query = select("?s").from_(self.CONTEXT)\ .where((jonathan.subject, surf.ns.FOAF['name'], '?s')) r = store.execute_sparql(unicode(query)) self.assertEquals(set(entry['s']['value'] for entry in r["results"]["bindings"]), set([john.foaf_name[0]]))
def _classification_search(query, start, limit, **kw): crdf = CignoRDF() rootNode = kw.get('node','').split('|')[-1] rootResource = crdf.Collections(rootNode) where = crdf.get_where_tree(rootResource) query_obj = select("?s").where(*where).distinct() resources = crdf.session.default_store.execute_sparql("%s" % query_obj) results = [] for resource in resources['results']['bindings']: res = crdf.CignoResources(resource['s']['value']) result = {} result['title'] = res.rdfs_label.first.format() result['uuid'] = res.cigno_uuid.first.format() result['detail'] = res.subject.format() results.append(result) result = {'rows': results, 'total': len(results)} result['query_info'] = { 'start': start, 'limit': limit, 'q': query } if start > 0: prev = max(start - limit, 0) params = urlencode({'q': query, 'start': prev, 'limit': limit}) result['prev'] = reverse('geonode.maps.views.metadata_search') + '?' + params next = 3 if next > 0: params = urlencode({'q': query, 'start': next - 1, 'limit': limit}) result['next'] = reverse('geonode.maps.views.metadata_search') + '?' + params return result
def query_p_s(c, p, direct, context): """ Construct :class:`surf.query.Query` with `?s` and `?c` as unknowns. :param c: the `class` :param p: the `predicate` :param bool direct: whether the predicate is direct or inverse :param context: the context :return: the query :rtype: :class:`surf.query.Query` """ query = select('?s', '?c').distinct() if context: query.from_(context) for i in range(len(p)): s, v = ('?s', '?v{0:d}'.format(i)) if direct else ('?v{0:d}'.format(i), '?s') if type(p[i]) is URIRef: query.where((s, p[i], v)) query.optional_group(('?s', a, '?c')) return query
def _get_by_n_queries(self, params): context = params.get("context", None) query = select("?s") if not (context is None): query.from_(context) _apply_solution_modifiers(params, query) # Load details, for now the simplest approach with N queries. # Use _to_table instead of convert to preserve order. results = [] for match in self._to_table(self._execute(query)): subject = match["s"] instance_data = {} result = self._execute(query_s(subject, True, context)) result = self.convert(result, 'p', 'v', 'c') instance_data["direct"] = result if not params.get("direct_only"): result = self._execute(query_s(subject, False, context)) result = self.convert(result, 'p', 'v', 'c') instance_data["inverse"] = result results.append((subject, instance_data)) return results
def __get_by_n_queries(self, params): contexts = params.get("contexts", None) query = select("?s") if contexts: query.from_(*contexts) query.from_named(*contexts) self.__apply_limit_offset_order_get_by_filter(params, query) # Load details, for now the simplest approach with N queries. # Use _to_table instead of convert to preserve order. results = [] for match in self._to_table(self._execute(query)): subject = match["s"] instance_data = {} result = self._execute(query_S(subject, True, contexts)) result = self.convert(result, 'p', 'v', 'g', 'c') instance_data["direct"] = result if not params.get("only_direct"): result = self._execute(query_S(subject, False, contexts)) result = self.convert(result, 'p', 'v', 'g', 'c') instance_data["inverse"] = result results.append((subject, instance_data)) return results
def test_from_none(): """ Check that .from_(None) raises. """ query = select("?s") with pytest.raises(ValueError): query.from_(None)
def test_subquery(self): """ Try to produce query that contains subquery in WHERE clause. """ expected = canonical(u""" SELECT ?s ?p ?o WHERE { ?s ?p ?o. { SELECT ?s WHERE { ?s ?a ?b } LIMIT 3 } } """) subquery = select("?s").where(("?s", "?a", "?b")).limit(3) query = select("?s", "?p", "?o").where(("?s", "?p", "?o"), subquery) result = canonical(SparqlTranslator(query).translate()) self.assertEqual(expected, result)
def process(self, input, output): print 'processing ' + input.subject # Fails: # #PREFIX owl: <http://www.w3.org/2002/07/owl#> #PREFIX dcterms: <http://purl.org/dc/terms/> #PREFIX conversion: <http://purl.org/twc/vocab/conversion/> #SELECT count(distinct ?o) as ?count #WHERE { # GRAPH <http://logd.tw.rpi.edu/source/nci-nih-gov/dataset/tobacco-law-coverage/version/2010-Aug-25/conversion/enhancement/1/subset/sample> { # ?s ?p ?o . # filter(regex(str(?o),'http://logd.tw.rpi.edu/id/us.*')) # } #} # Passes: # #PREFIX owl: <http://www.w3.org/2002/07/owl#> #PREFIX dcterms: <http://purl.org/dc/terms/> #PREFIX conversion: <http://purl.org/twc/vocab/conversion/> #SELECT count(distinct ?o) as ?count #WHERE { # GRAPH <http://logd.tw.rpi.edu/source/data-gov/dataset/1000/version/2010-Aug-30/conversion/enhancement/1/subset/sample> { # ?s ?p ?o . # filter(regex(str(?o),'http://logd.tw.rpi.edu/id/us.*')) # } #} #### # Query a SPARQL endpoint #store = Store(reader = 'sparql_protocol', endpoint = 'http://dbpedia.org/sparql') #session = Session(store) #session.enable_logging = False #result = session.default_store.execute_sparql('select distinct ?type where {[] a ?type} limit 2') #if result: # for binding in result['results']['bindings']: # type = binding['type']['value'] # print type #### store = Store(reader = 'sparql_protocol', endpoint = 'http://logd.tw.rpi.edu/sparql') session = Session(store) session.enable_logging = False query = select("?o").named_group(input.subject,('?s', '?p', '?o')).filter('regex(str(?o),"http://logd.tw.rpi.edu/id/us.*")') print unicode(query) result = session.default_store.execute(query) if result: for binding in result['results']['bindings']: print binding['o']['value'] output.rdf_type.append(ns.DATAFAQS['Satisfactory']) if ns.DATAFAQS['Satisfactory'] not in output.rdf_type: output.rdf_type.append(ns.DATAFAQS['Unsatisfactory']) output.save()
def query_SP(s, p, direct, context): """ Construct :class:`surf.query.Query` with `?v` and `?c` as unknowns. """ s, v = direct and (s, '?v') or ('?v', s) query = select('?v', '?c').distinct() query.where((s, p, v)).optional_group(('?v', a, '?c')) if context: query.from_(context) return query
def query_concept(s): """ Construct :class:`surf.query.Query` with `?c` as the unknown. :param s: the `subject` :return: the query :rtype: :class:`surf.query.Query` """ return select('?c').distinct().where((s, a, '?c'))
def test_simple(self): """ Try to produce a simple "SELECT ... WHERE ..." query. """ expected = canonical(u"SELECT ?s ?p ?o WHERE { ?s ?p ?o }") query = select("?s", "?p", "?o").where(("?s", "?p", "?o")) result = SparqlTranslator(query).translate() # Translated query should be unicode object. self.assertTrue(isinstance(result, unicode)) result = canonical(result) self.assertEqual(expected, result)
def get_label(subject,baseuri,store): label = None ns_titles = [ns.RDFS["comment"],ns.RDFS["label"],ns.DC["title"],ns.DCTERMS["title"]] for title in ns_titles: query = select("?o").where((subject,title,"?o")) found_title = list(store.reader._to_table(store.reader._execute(query))) #print 'Nombre de labels '+title+' :'+str(len(found_title)) if(len(found_title)>0): label = literal_lang_select([found_title[x]['o'] for x in range(len(found_title))]) if(label==None): #on n'a rien trouvé du tout label = unicode(subject).replace(baseuri,'') return label
def test_simple(): """ Try to produce a simple "SELECT ... WHERE ..." query. """ expected = canonical(u"SELECT ?s ?p ?o WHERE { ?s ?p ?o }") query = select("?s", "?p", "?o").where(("?s", "?p", "?o")) result = SparqlTranslator(query).translate() # Translated query should be unicode object. assert isinstance(result, basestring) result = canonical(result) assert expected == result
def test_union(self): """ Try to produce query containing union. """ expected = canonical(u""" SELECT ?s WHERE { { ?s ?v1 ?v2} UNION { ?s ?v3 ?v4 } } """) query = select("?s").union(("?s", "?v1", "?v2"), ("?s", "?v3", "?v4")) result = canonical(SparqlTranslator(query).translate()) self.assertEqual(expected, result)
def query_P_S(c, p, direct, context): """ Construct :class:`surf.query.Query` with `?s` and `?c` as unknowns. """ query = select('?s', '?c').distinct() if context: query.from_(context) for i in range(len(p)): s, v = direct and ('?s', '?v%d' % i) or ('?v%d' % i, '?s') if type(p[i]) is URIRef: query.where((s, p[i], v)) query.optional_group(('?s', a, '?c')) return query
def test_str(self): """ Try str(query). """ expected = canonical(u""" SELECT ?s ?p ?o WHERE { ?s ?p ?o } """) query = select("?s", "?p", "?o").where(("?s", "?p", "?o")) # test str() self.assertEqual(expected, canonical(unicode(str(query)))) # test unicode() self.assertEqual(expected, canonical(unicode(query)))
def query_s(s, direct, context): """ Construct :class:`surf.query.Query` with `?p`, `?v` and `?c` as unknowns. :param s: the `subject` :param bool direct: whether the predicate is direct or inverse :param context: the context :return: the query :rtype: :class:`surf.query.Query` """ s, v = (s, '?v') if direct else ('?v', s) query = select('?p', '?v', '?c').distinct() query.where((s, '?p', v)).optional_group(('?v', a, '?c')) if context: query.from_(context) return query
def test_from(self): """ Try to produce query that contains FROM clauses. """ expected = canonical(u""" SELECT ?s ?p ?o FROM <http://uri1> FROM <http://uri2> WHERE { ?s ?p ?o } """) query = select("?s", "?p", "?o").where(("?s", "?p", "?o")) query.from_("http://uri1", URIRef("http://uri2")) result = canonical(SparqlTranslator(query).translate()) self.assertEqual(expected, result)
def query_S(s, direct, contexts): """ Construct :class:`surf.query.Query` with `?p`, `?v` and `?g`, `?c` as unknowns. """ s, v = direct and (s, '?v') or ('?v', s) query = select('?p', '?v', '?c', '?g').distinct() # Get predicate, objects and optionally rdf:type & named graph of # subject rdf:type and object rdf:type # TODO fails under Virtuoso as V. doesn't allow ?g to be bound to two # optional matches query.where((s, '?p', v)).optional_group(('?v', a, '?c'))\ .optional_group(named_group('?g', (s, a, v)))\ .optional_group(named_group('?g', ('?v', a, '?c'))) if contexts: query.from_(*contexts) query.from_named(*contexts) return query
def test_exceptions(self): """ Test that exceptions are raised on invalid queries. """ store = surf.Store(reader="sparql_protocol", writer="sparql_protocol", endpoint="invalid") def try_query(): store.execute(query) query = select("?a") self.assertRaises(SparqlReaderException, try_query) def try_add_triple(): store.add_triple("?s", "?p", "?o") self.assertRaises(SparqlWriterException, try_add_triple)
def test_exceptions(self): """ Test that exceptions are raised on invalid queries. """ store = surf.Store(reader = "sparql_protocol", writer = "sparql_protocol", endpoint = "invalid") def try_query(): store.execute(query) query = select("?a") self.assertRaises(SparqlReaderException, try_query) def try_add_triple(): store.add_triple("?s", "?p", "?o") self.assertRaises(SparqlWriterException, try_add_triple)
def process(self, input, output): store = surf.Store(reader = 'rdflib', writer = 'rdflib', rdflib_store = 'IOMemory') session = surf.Session(store) store.load_triples(source = input.subject) output.datafaqs_resolved_triples = store.size(); query = select('?triples').where((input.subject, ns.VOID['triples'], '?triples')) for count in store.execute(query): output.void_triples.append(count) output.rdf_type.append(ns.DATAFAQS['Satisfactory']) print str(store.size()) + ' dereferenced RDF triples asserted that ' + input.subject + ' has ' + str(count) + ' triples.' if ns.DATAFAQS['Satisfactory'] not in output.rdf_type: output.rdf_type.append(ns.DATAFAQS['Unsatisfactory']) print str(store.size()) + ' dereferenced RDF triples, but no void:triples asserted for ' + input.subject output.save()
def _get_by(self, params): # Decide which loading strategy to use if "full" in params: if self.use_subqueries: return self.__get_by_subquery(params) else: return self.__get_by_n_queries(params) # No details, just subjects and classes query = select("?s", "?c", "?g") self.__apply_limit_offset_order_get_by_filter(params, query) query.optional_group(("?s", a, "?c")) # Query for the same tuple to get the named graph if obtainable query.optional_group(named_group("?g", ("?s", a, "?c"))) contexts = params.get("contexts", None) if contexts: query.from_(*contexts) query.from_named(*contexts) # Load just subjects and their types table = self._to_table(self._execute(query)) # Create response structure, preserve order, don't include # duplicate subjects if some subject has multiple types subjects = {} results = [] for match in table: subject = match["s"] if not subject in subjects: instance_data = {"direct" : {a : {}}} subjects[subject] = instance_data results.append((subject, instance_data)) # "context" comes from an optional group and is missing if the # triple is stored in the unamed graph context = match.get("g") if "c" in match: concept = match["c"] subjects[subject]["direct"][a][concept] = {context: []} return results
def test_from_named(): """ Try to produce query that contains FROM & FROM NAMED clauses. """ expected = canonical(u""" SELECT ?s ?p ?o FROM <http://uri1> FROM NAMED <http://uri1> FROM NAMED <http://uri2> WHERE { ?s ?p ?o } """) query = select("?s", "?p", "?o").where(("?s", "?p", "?o")) query.from_("http://uri1") query.from_named("http://uri1", URIRef("http://uri2")) result = canonical(SparqlTranslator(query).translate()) assert expected == result
def test_json_datatypes(self): """ Test that proper datatypes are returned. """ # Tests for a bug wrt datatype uri with AllegroGraph store, session = self._get_store_session(use_default_context=False) Person = session.get_class(surf.ns.FOAF + "Person") # Store datatype jake = session.get_resource("http://Jake", Person) jake.foaf_name = "Jake" jake.foaf_age = 62 jake.save() # Get birthday query = select('?age').where(('?s', a, Person.uri), ('?s', ns.FOAF.age, '?age')) result = store.execute_sparql(unicode(query)) assert len(result['results']['bindings']) == 1 entry = result['results']['bindings'][0] # Test that rdflib type is properly constructed age = json_to_rdflib(entry['age']) self.assertEquals(age.toPython(), 62)
def test_same_as_inference_works(self): """ Test owl:sameAs inferencing. """ store, session = self._get_store_session() # Let's say Jonathan is the same Person as John Person = session.get_class(surf.ns.FOAF["Person"]) john = session.get_resource("http://John", Person) john.load() jonathan = session.get_resource("http://Jonathan", Person) jonathan.foaf_homepage = 'http://example.com' john[surf.ns.OWL['sameAs']] = jonathan session.commit() store.reader.define = 'input:same-as "yes"' query = select("?s").from_(self.CONTEXT)\ .where((jonathan.subject, surf.ns.FOAF['name'], '?s')) r = store.execute_sparql(unicode(query)) self.assertEquals( set(entry['s'] for entry in r["results"]["bindings"]), set([john.foaf_name[0]]))
def _get_by(self, params): # Decide which loading strategy to use if "full" in params: if self.use_subqueries: return self.__get_by_subquery(params) else: return self.__get_by_n_queries(params) # No details, just subjects and classes query = select("?s", "?c") self.__apply_limit_offset_order_get_by_filter(params, query) query.optional_group(("?s", a, "?c")) context = params.get("context", None) if not (context is None): query.from_(context) # Load just subjects and their types table = self._to_table(self._execute(query)) # Create response structure, preserve order, don't include # duplicate subjects if some subject has multiple types subjects = {} results = [] for match in table: subject = match["s"] if not subject in subjects: instance_data = {"direct" : {a : {}}} subjects[subject] = instance_data results.append((subject, instance_data)) if "c" in match: concept = match["c"] subjects[subject]["direct"][a][concept] = [] return results
def _classification_search(query, start, limit, **kw): crdf = CignoRDF() rootNode = kw.get('node','').split('|')[-1] rootResource = crdf.Collections(rootNode) where = crdf.get_where_tree(rootResource) query_obj = select("?s").where(*where).distinct() querys = sanitize_sparql("%s" % query_obj) resources = crdf.session.default_store.execute_sparql(querys) results = [] for resource in resources['results']['bindings']: res = crdf.CignoResources(resource['s']['value']) result = {} result['title'] = res.rdfs_label.first.format() result['uuid'] = res.cigno_uuid.first.format() result['detail'] = res.subject.format() results.append(result) result = {'rows': results, 'total': len(results)} result['query_info'] = { 'start': start, 'limit': limit, 'q': query } if start > 0: prev = max(start - limit, 0) params = urlencode({'q': query, 'start': prev, 'limit': limit}) result['prev'] = reverse('geonode.maps.views.metadata_search') + '?' + params next = 3 if next > 0: params = urlencode({'q': query, 'start': next - 1, 'limit': limit}) result['next'] = reverse('geonode.maps.views.metadata_search') + '?' + params return result
def _get_by(self, params): # Decide which loading strategy to use if "full" in params: if self.use_subqueries: return self._get_by_subquery(params) else: return self._get_by_n_queries(params) # No details, just subjects and classes query = select("?s", "?c") _apply_solution_modifiers(params, query) query.optional_group(("?s", a, "?c")) context = params.get("context", None) if not (context is None): query.from_(context) # Load just subjects and their types table = self._to_table(self._execute(query)) # Create response structure, preserve order, don't include # duplicate subjects if some subject has multiple types subjects = {} results = [] for match in table: subject = match["s"] if subject not in subjects: instance_data = {"direct": {a: {}}} subjects[subject] = instance_data results.append((subject, instance_data)) if "c" in match: concept = match["c"] subjects[subject]["direct"][a][concept] = [] return results
def __get_by_subquery(self, params): contexts = params.get("contexts", None) inner_query = select("?s") inner_params = params.copy() if "order" in params: # "order" needs to stay in subquery, # but doesn't do anything useful in main query del params["order"] self.__apply_limit_offset_order_get_by_filter(inner_params, inner_query) query = select("?s", "?p", "?v", "?c", "?g").distinct() # Get values with object type & context # TODO we need to query both contexts, from ?s -> rdf_type & ?v -> rdf_type but Virtuoso does not bind ?g twice. Bug or feature? query.group(('?s', '?p', '?v'), optional_group(('?v', a, '?c')), optional_group(named_group("?g", ("?s", a, "?v")))) #optional_group(named_group("?g", ("?v", a, "?c")))) query.where(inner_query) if contexts: query.from_(*contexts) query.from_named(*contexts) # Need ordering in outer query if "order" in params: if params["order"] == True: # Order by subject URI query.order_by("?s") else: # Match another variable, order by it query.optional_group(("?s", params["order"], "?order")) query.order_by("?order") table = self._to_table(self._execute(query)) subjects = {} results = [] for match in table: # Make sure subject and predicate are URIs (they have to be!), # this works around bug in Virtuoso -- it sometimes returns # URIs as Literals. subject = URIRef(match["s"]) predicate = URIRef(match["p"]) value = match["v"] # Add subject to result list if it's not there if not subject in subjects: instance_data = {"direct" : {}} subjects[subject] = instance_data results.append((subject, instance_data)) # Add predicate to subject's direct predicates if it's not there direct_attributes = subjects[subject]["direct"] if not predicate in direct_attributes: direct_attributes[predicate] = {} # "context" comes from an optional group and is missing if the # triple is stored in the unamed graph context = match.get("g") # Add value to subject->predicate if ... predicate_values = direct_attributes[predicate] if not value in predicate_values: predicate_values[value] = {context: []} # Add RDF type of the value to subject->predicate->value list if "c" in match: predicate_values[value][context].append(match["c"]) return results
def _get_by_subquery(self, params): context = params.get("context", None) inner_query = select("?s") inner_params = params.copy() if "order" in params: # "order" needs to stay in subquery, # but doesn't do anything useful in main query del params["order"] _apply_solution_modifiers(inner_params, inner_query) if params.get('direct_only'): query = select("?s", "?p", "?v", "?c").distinct() query.group(('?s', '?p', '?v'), optional_group(('?v', a, '?c'))) else: direct_query = select("?s", "?p", "?v", "?c", '("0" AS ?i)') direct_query.distinct() direct_query.group(('?s', '?p', '?v'), optional_group(('?v', a, '?c'))) indirect_query = select("?s", "?p", "?v", "?c", '("1" AS ?i)') indirect_query.distinct() indirect_query.group(('?v', '?p', '?s'), optional_group(('?v', a, '?c'))) query = select("?s", "?p", "?v", "?c", "?i") query.union(direct_query, indirect_query) query.where(inner_query) if not (context is None): query.from_(context) # Need ordering in outer query if "order" in params: if params["order"]: # Order by subject URI query.order_by("?s") else: # Match another variable, order by it query.optional_group(("?s", params["order"], "?order")) query.order_by("?order") table = self._to_table(self._execute(query)) subjects = {} results = [] for match in table: # Make sure subject and predicate are URIs (they have to be!), # this works around bug in Virtuoso -- it sometimes returns # URIs as Literals. subject = URIRef(match["s"]) predicate = URIRef(match["p"]) value = match["v"] # Inverse given if direct_only is False inverse = match.get("i") == "1" # Add subject to result list if it's not there if subject not in subjects: instance_data = {"direct": {}, "inverse": {}} subjects[subject] = instance_data results.append((subject, instance_data)) if inverse: attributes = subjects[subject]["inverse"] else: attributes = subjects[subject]["direct"] # Add predicate to subject's predicates if it's not there if predicate not in attributes: attributes[predicate] = {} # Add value to subject->predicate if ... predicate_values = attributes[predicate] if value not in predicate_values: predicate_values[value] = [] # Add RDF type of the value to subject->predicate->value list if "c" in match: predicate_values[value].append(match["c"]) return results
def get_not_empty_members(self, res): where = self.get_where_tree(res) query = select("?c").where(*where).distinct() querys = sanitize_sparql("%s" % query) result = self.session.default_store.execute_sparql(querys) return result['results']['bindings']
def process(self, input, output): print 'processing ' + input.subject endpoint = False # TODO: add dcat:distribution [ a sd:NamedGraph; sd:name ; prov:hadLocation # like https://github.com/timrdf/DataFAQs/blob/master/services/sadi/ckan/add-metadata-materials/sample-inputs/arrayexpress-e-afmx-1.ttl#L49 if len(input.void_sparqlEndpoint) > 0: # <http://datahub.io/dataset/dbpedia> # void:sparqlEndpoint <http://dbpedia.org/sparql> . endpoint = self.surfSubject(input.void_sparqlEndpoint.first) print 'void:sparqlEndpoint: ' + endpoint else: # <http://datahub.io/dataset/dbpedia> # dcat:distribution [ # dct:format [ # a dct:IMT; # rdf:value "api/sparql"; # rdfs:label "api/sparql" # ]; # a dcat:Distribution ; # dcat:accessURL <http://dbpedia.org/sparql> # ]; . query = select("?url").where((input.subject, ns.DCAT['distribution'], "?distro"), ("?distro", ns.DCTERMS['format'], "?format"), ("?format", ns.RDF['value'], rdflib.Literal('api/sparql')), ("?distro", ns.DCAT['accessURL'], "?url")) for bindings in input.session.default_store.execute(query): #print 'creator: ' + bindings[0] + ' ' + bindings[1] endpoint = self.surfSubject(bindings[0]) print 'dcat:distribution dcat:accessURL: ' + endpoint if endpoint is False: print 'WARNING: could not find SPARQL endpoint to query; skipping ' + input.subject output.rdf_type.append(ns.DATAFAQS['Unsatisfactory']) output.save() return ng='http://purl.org/twc/health/source/healthdata-tw-rpi-edu/dataset/cr-full-dump/version/latest' Class = output.session.get_class(ns.RDFS['Class']) Predicate = output.session.get_class(ns.RDF['Property']) #### # Query a SPARQL endpoint store = Store(reader = 'sparql_protocol', endpoint = endpoint) session = Session(store) session.enable_logging = False for predicate in self.predicates: results = session.default_store.execute_sparql( # TODO: handle optional named graph. ''' select (count(*) as ?count) where { [] <'''+predicate+'''> [] }''' ) #graph <'''+ng+'''> { count = False if isinstance(results, xml.dom.minidom.Document): for result in results.getElementsByTagName('result'): for binding in result.getElementsByTagName('binding'): for value in result.getElementsByTagName('literal'): count = int(value.firstChild.data) predR = output.session.get_resource(predicate, Predicate) predR.sio_count = count predR.save() output.rdf_type.append(ns.DATAFAQS['Satisfactory']) print str(count) + ' ' + predicate elif results: print results for binding in results['results']['bindings']: count = binding['count']['value'] print count for classU in self.classes: results = session.default_store.execute_sparql( # TODO: handle optional named graph. ''' select (count(*) as ?count) where { [] a <'''+classU+'''> }''' ) #graph <'''+ng+'''> { count = False if isinstance(results, xml.dom.minidom.Document): for result in results.getElementsByTagName('result'): for binding in result.getElementsByTagName('binding'): for value in result.getElementsByTagName('literal'): count = int(value.firstChild.data) classR = output.session.get_resource(classU, Class) classR.sio_count = count classR.save() output.rdf_type.append(ns.DATAFAQS['Satisfactory']) print str(count) + ' ' + classU elif results: print results for binding in results['results']['bindings']: count = binding['count']['value'] print count #### # Query the RDF graph POSTed: input.session.default_store.execute # Walk through all Things in the input graph (using SuRF): # Thing = input.session.get_class(ns.OWL['Thing']) # for person in Thing.all(): # Create a class in the output graph: # Document = output.session.get_class(ns.FOAF['Document']) if ns.DATAFAQS['Satisfactory'] not in output.rdf_type: output.rdf_type.append(ns.DATAFAQS['Unsatisfactory']) else: Ontology = output.session.get_class(ns.OWL['Ontology']) output.void_vocabulary.append(Ontology('http://www.w3.org/ns/prov#')) output.save()
def test_from_none(self): """ Check that .from_(None) raises. """ query = select("?s") self.assertRaises(ValueError, lambda: query.from_(None))
def query_Concept(subject): """ Construct :class:`surf.query.Query` with `?c` as the unknown. """ return select('?c').distinct().where((subject, a, '?c'))