def contact_sparql_endpoint(): # si contatta lo SPARQL endpoint per un certo dataset (nome dataset = /data) # SPARQL service = SPARQL endpoint = triplestore che si occupa di memorizzare le annotazioni # si chiede di restituire i risultati in formato JSON # Lo SPARQL endpoint locale del progetto è raggiungibile al seguente URL: http://localhost:3030/data/query # sparql_endpoint = SPARQLWrapper("http://localhost:3030/data/query", returnFormat="json") # Lo SPARQL endpoint ufficiale del progetto è raggiungibile al seguente URL: http://tweb2015.cs.unibo.it:8080/data # ogni gruppo ha un grafo su questo stesso endpoint, l'IRI é: http://vitali.web.cs.unibo.it/raschietto/graph/ltw1537 # necessaria l'autenticazione ttramite user=ltw1537, password= # sparql_endpoint = SPARQLWrapper("http://tweb2015.cs.unibo.it:8080/data", returnFormat="json") # SPARQL endpoint di esempio sparql_endpoint = SPARQLWrapper("http://dbpedia.org/sparql", returnFormat="json") # query di esempio query = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT ?label WHERE { <http://dbpedia.org/resource/Asturias> rdfs:label ?label } """ # set della query SPARQL sparql_endpoint.setQuery(query) # esecuzione della query results = sparql_endpoint.query().convert() # visualizzazione dei risultati for result in results["results"]["bindings"]: print(result["label"])
def Inizialier(endpoint): # mongo.startTest(endpoint) # add Lock q = queryGenerator.QueryGenerator() sparql = SPARQLWrapper(endpoint['url']) sparql.setTimeout(300) return endpoint,q,sparql
class SPARQLKB(KB): '''SPARQL endpoint knowledge base''' def __init__(self, sentence=None, endpoint=None): '''Constructor method Usage: sentence - RDF triple to be added to KB (not implemented; default:None) endpoint - URL of endpoint to query''' self.sparql = SPARQLWrapper(endpoint) if sentence: self.tell(sentence) def tell(self, sentence): '''Adding triples to RDF store - not implemented''' raise NotImplementedError('Adding sentences to RDF knowledge bases is not implemented') def ask(self, query): '''Queries the endpoint''' self.sparql.setQuery(query) self.sparql.setReturnFormat(XML) res = self.sparql.query().convert().getElementsByTagName('result') res = [dict([(bin.attributes['name'].nodeValue, bin.firstChild.firstChild.nodeValue) for bin in node.getElementsByTagName('binding')]) for node in res] return res def retract(self, sentence): '''Removing triples from RDF store - not implemented''' raise NotImplementedError('Removing sentences to RDF knowledge bases is not implemented') def _encode( self, key, value ): '''Encoding a value in the triple store - not implemented''' raise NotImplementedError('Encoding values in RDF knowledge bases is not implemented') def _decode( self, key ): '''Decoding a value from the triple store - not implemented''' raise NotImplementedError('Decoding values from RDF knowledge bases is not implemented')
def sparql(): query = request.args.get("query", None) inferencing = request.args.get("inferencing") # If the query and inference variables are not empty, send the sparql query # to the stardog database and return the results as JSON if query and inferencing: sparql = SPARQLWrapper(TRIPLE_STORE + "/query") sparql.setQuery(query) sparql.setReturnFormat(JSON) sparql.addParameter("Accept", "application/sparql-results+json") sparql.addParameter("reasoning", inferencing) try: response = sparql.query().convert() return jsonify(response) except Exception as e: return jsonify({"result": "Error"}) else: return jsonify({"result": "Error"})
def describe(request, type, path, format='rdf'): uri = '<http://doc.metalex.eu/{0}/{1}>'.format(type, path) # q = "DESCRIBE {0}".format(uri) # Get a symmetric concise bounded description (SCBD) q = "CONSTRUCT {"+uri+" ?p ?o . ?s ?p2 "+uri+" .} WHERE { {"+uri+" ?p ?o .} UNION {?s ?p2 "+uri+" .} }" sparql = SPARQLWrapper(SPARQL_ENDPOINT) sparql.setQuery(q) cg = sparql.queryAndConvert() cg = setNamespaces(cg) if format=='ttl' : response = HttpResponse(cg.serialize(format='turtle')) response['Content-Type'] = 'application/x-turtle' elif format=='n3' : response = HttpResponse(cg.serialize(format='n3')) response['Content-Type'] = 'text/rdf+n3' elif format=='rdf' : response = HttpResponse(sparql.query()) response['Content-Type'] = 'application/rdf+xml' else : t = get_template('message.html') html = t.render(RequestContext(request, { 'title': 'Oops', 'text' : 'We do not serve content of this type for this URI'})) return HttpResponse(html) return response
class QueryManager: def __init__(self, endpoint=settings.ENDPOINT, updateEndpoint=settings.UPDATE, graph=settings.GRAPH, format=JSON): self.endpoint = SPARQLWrapper(endpoint) self.endpoint.setReturnFormat(format) # not using SPARQLwrapper because update endpoints don't seem to work self.updateEndpoint = updateEndpoint self.graph = graph def query(self, query): self.endpoint.setQuery(query) return self.endpoint.query().convert() def update(self, query): # not using SPARQLwrapper because update endpoints don't seem to work response = urllib2.urlopen(self.updateEndpoint, data='update='+query).read() if 'error' in response: return False else: return True def insert(self, query): self.update('INSERT DATA { GRAPH <' + self.graph + '> ' + query + '}') def ask(self, uri): q = "ASK { GRAPH <" + self.graph + "> { <" + uri + "> ?p ?o . } }" return self.query(q)['boolean'] def describe(self, uri): if self.ask(uri): q = "DESCRIBE <" + uri + "> FROM <" + self.graph + ">" return self.query(q)[str(uri)] else: return False
def sparqlOneEndpoint( endpoint, query, apikey=None ): out = None try: sparql=SPARQLWrapper(endpoint) if apikey: sparql.addCustomParameter("apikey", apikey) sparql.setQuery(query) sparql.setReturnFormat(JSON) sparql.setTimeout( 30 ) #print "\n", "# " * 7, "\n", query, "\n", "# " * 7, "\n" out = sparql.query().convert() except: print "Could not process formulated query on indicated endpoint." pass return out
class sparqlquerier: def __init__(self): self.sparql = SPARQLWrapper("https://linkeddata1.calcul.u-psud.fr/sparql") self.sparql.setReturnFormat(JSON) self.baseq = ''' select * where { <http://yago-knowledge.org/resource/%s> ?property ?valueOrObject . FILTER regex(str(?property ), "^http://www.w3.org/1999/02/22-rdf-syntax-ns#type") FILTER regex(str(?valueOrObject ), "^http://yago-knowledge.org/resource/wikicat_") } LIMIT 100 ''' def query(self,name): name = "_".join(name.split()) self.sparql.setQuery(self.baseq % (name)) result = self.sparql.query().convert() return result def wikicat(self,socialtags): wcdict = {} for tag in socialtags: try: result = self.query(tag) result = result["results"]["bindings"] except Exception as e: print("query failed %s" % e) #logging.debug("query failed %s \n %s" % (e,result)) continue for i in result: wcdict.setdefault(i["valueOrObject"]["value"],[]) wcdict[i["valueOrObject"]["value"]].append(tag) # each wikicat maps a list of socialTag return wcdict
class SPARQLEntityLinker(Linker): def __init__(self, url="http://dbpedia.org/sparql"): self.url = url self.sparql = SPARQLWrapper(url) self.sparql.setReturnFormat(JSON) self.query_processor = Query() self.query = '''select distinct ?uri where { ?uri rdfs:label "%s"@en . {?uri rdf:type <http://dbpedia.org/ontology/Place>} UNION {?uri rdf:type <http://dbpedia.org/ontology/Person>} UNION {?uri rdf:type <http://dbpedia.org/ontology/Agent>} }''' self.category = { "GPE":"http://dbpedia.org/ontology/Country", "PERSON":"http://dbpedia.org/ontology/Person", } def linking(self, query): tokens = self.query_processor.tokenization(query) entities = [] for i in range(len(tokens)): entities.append(tokens[i]) for i in range(len(tokens)-1): entities.append(' '.join([tokens[i], tokens[i+1]])) e_links = [] for e in entities: q = self.query % e.title() self.sparql.setQuery(q) results = self.sparql.query().convert() e_links += [result["uri"]["value"] for result in results["results"]["bindings"]] return e_links
def construct(): app.logger.debug('You arrived at ' + url_for('construct')) app.logger.debug('I received the following arguments' + str(request.args)) endpoint = request.args.get('endpoint', None) symptomArray = request.args.getlist('symptomArray[]') atonomyArray = request.args.getlist('atonomyArray[]') b = bool(int(request.args.get('b', 1))) id_tuple = [(x, True) for x in symptomArray] + [(x, False) for x in atonomyArray] triple = create_equivelant_class(id_tuple) prefix = '\n'.join(['prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>', 'prefix owl: <http://www.w3.org/2002/07/owl#>', 'prefix xsd: <http://www.w3.org/2001/XMLSchema#>', 'prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>']) query = '%s \n INSERT DATA { %s }' % (prefix, triple) if not b: sparql = SPARQLWrapper(endpoint) sparql.setQuery(query) sparql.query().convert() yourDisease = queryYourDisease(endpoint) if yourDisease != None: return yourDisease else: return queryDatabase(endpoint, array=[symptomArray, atonomyArray]) else: return queryDatabase(endpoint, array=[symptomArray, atonomyArray])
class SparqlEndpoint(object): def __init__(self, endpoint, prefixes={}): self.sparql = SPARQLWrapper(endpoint) self.prefixes = { "dbpedia-owl": "http://dbpedia.org/ontology/", "owl": "http://www.w3.org/2002/07/owl#", "xsd": "http://www.w3.org/2001/XMLSchema#", "rdfs": "http://www.w3.org/2000/01/rdf-schema#", "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", "foaf": "http://xmlns.com/foaf/0.1/", "dc": "http://purl.org/dc/elements/1.1/", "dbpedia2": "http://dbpedia.org/property/", "dbpedia": "http://dbpedia.org/", "skos": "http://www.w3.org/2004/02/skos/core#", "foaf": "http://xmlns.com/foaf/0.1/", } self.prefixes.update(prefixes) self.sparql.setReturnFormat(JSON) def query(self, q): lines = ["PREFIX %s: <%s>" % (k, r) for k, r in self.prefixes.iteritems()] lines.extend(q.split("\n")) query = "\n".join(lines) print query self.sparql.setQuery(query) results = self.sparql.query().convert() return results["results"]["bindings"]
class DbpediaReader: def __init__(self, db): self.db = db self.sparql = SPARQLWrapper("http://dbpedia.org/sparql") @staticmethod def __print_query_results(title, offset): print(title + " " + str(offset)) pass def __read_results_from_query_resource(self, resource_name, *args): query = get_resource(resource_name).format(*args) results = self.__exec_query(query) return results['results']['bindings'] def __save_results_from_query_resource_batched(self, save_method, resource_name, *args): offset = 0 while True: batch = self.__read_results_from_query_resource(resource_name, *args, offset) save_method(batch) DbpediaReader.__print_query_results(resource_name, offset) if len(batch) < 10000: break offset += 10000 def __exec_query(self, query): self.sparql.setQuery(query) self.sparql.setReturnFormat(JSON) return self.sparql.query().convert() def save_raw_persons(self): return self.__save_results_from_query_resource_batched(self.db.insert_raw_persons, 'person_query.txt') def save_raw_roles(self): return self.__save_results_from_query_resource_batched(self.db.insert_raw_roles, 'role_query.txt') def save_raw_relations(self): for relation in Relation: self.__save_raw_relations_for_type(relation) def __save_raw_relations_for_type(self, relation): names = relation.get_relations_names() for name in names: self.__save_results_from_query_resource_batched( lambda data: self.db.insert_raw_relations(DbpediaReader.__create_relation_dict(relation.name, data)), 'relation_query.txt', name) def save_raw_redirects(self): self.__save_results_from_query_resource_batched( lambda data: self.db.insert_raw_relations(DbpediaReader.__create_relation_dict(Relation.OTHER.name, data)), 'wiki_redirect_query.txt') def save_raw_types(self): self.__save_results_from_query_resource_batched(self.db.save_raw_types, 'type_query.txt') @staticmethod def __create_relation_dict(name, relations): # print(relations) return dict(type=name, relations=relations)
def get_places_within(upper, lower): sparql = SPARQLWrapper(app.config['endpoint']) sparql.setReturnFormat(JSON) sparql.addParameter('Accept', 'application/sparql-results+json') sparql.addParameter('reasoning', 'true') prefixes = ''' prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> prefix owl: <http://www.w3.org/2002/07/owl#> prefix xsd: <http://www.w3.org/2001/XMLSchema#> prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> ''' query = ''' select ?place ?lat ?lon where { ?place a scr:Place . ?place geo:lat ?lat . ?place geo:long ?lon . FILTER(xsd:float(?lat) < %f && xsd:float(?lon) < %f && xsd:float(?lat) > %f && xsd:float(?lon) > %f ) } ''' sparql.setQuery(prefixes + query % (upper[0], upper[1], lower[0], lower[1])) # noqa response = sparql.query().convert() if response['results']['bindings']: return response['results']['bindings'] else: return []
class Sparql(): def __init__(self, resource): self.PROPERTIES = ['mother', 'father', 'spouse', 'abstract'] self.wrapper = SPARQLWrapper("http://dbpedia.org/sparql") self.result = self.queryResource(resource) def queryResource(self, resource): rs = {} resource = "<http://dbpedia.org/resource/" + resource + ">" self.wrapper.setQuery(""" PREFIX db: <http://dbpedia.org/resource/> select ?property ?value where { { """ + resource + """ ?property ?value. } } """) self.wrapper.setReturnFormat(JSON) results = self.wrapper.query().convert() for result in results['results']['bindings']: if any (prop in result['property']['value'] for prop in self.PROPERTIES): if 'xml:lang' in result['value']: if result['value']['xml:lang'] == 'en': rs[ self.cleanProperty(result['property']['value']) ] = (self.cleanProperty(result['value']['value']) ,result['value']['type'] ) else: rs[ self.cleanProperty(result['property']['value']) ] = (self.cleanProperty(result['value']['value']) ,result['value']['type'] ) return rs def cleanProperty(self, prop): return str(prop.split("/")[-1])
def describe(endpoint, query): sparql = SPARQLWrapper(endpoint) sparql.setQuery(query) try: return sparql.query().convert() except RuntimeWarning: pass
class ReaderPlugin(RDFQueryReader): def __init__(self, *args, **kwargs): RDFQueryReader.__init__(self, *args, **kwargs) self.__endpoint = kwargs['endpoint'] if 'endpoint' in kwargs else None self.__results_format = JSON self.__sparql_wrapper = SPARQLWrapper(self.__endpoint, self.__results_format) if kwargs.get("use_keepalive", "").lower().strip() == "true": if hasattr(SPARQLWrapper, "setUseKeepAlive"): self.__sparql_wrapper.setUseKeepAlive() # Try to use cjson try: import cjson jsonlayer.use("cjson") self.log.info("using cjson") except: self.log.warning("cjson not available, falling back on slower simplejson") endpoint = property(lambda self: self.__endpoint) results_format = property(lambda self: self.__results_format) def _to_table(self, result): if not isinstance(result, dict): return result if not "results" in result: return result converted = [] for binding in result["results"]["bindings"]: rdf_item = {} for key, obj in binding.items(): try: rdf_item[key] = toRdflib(obj) except ValueError: continue converted.append(rdf_item) return converted def _ask(self, result): ''' returns the boolean value of a ASK query ''' return result.get("boolean") def execute_sparql(self, q_string, format = 'JSON'): try: self.log.debug(q_string) self.__sparql_wrapper.setQuery(q_string) return self.__sparql_wrapper.query().convert() except EndPointNotFound, _: raise SparqlReaderException("Endpoint not found"), None, sys.exc_info()[2] except QueryBadFormed, _: raise SparqlReaderException("Bad query: %s" % q_string), None, sys.exc_info()[2]
def query_SPARQL_Endpoint(self, endpoint_URI, query_str): try: sparql = SPARQLWrapper(endpoint_URI) sparql.setQuery(query_str) results = sparql.query().info() return results except Exception, e: if LinkedDataProfiler.DEBUG: print('I was not able to execute the SPARQL query against %s\nReason: %s' %(endpoint_URI,e))
def __init__(self, url, query_head, query_foot): '''init SPARQLWrapper with the specified URL, set the return format, set the size max_body_size of the parameters of our batch query''' SPARQLWrapper.__init__(self, url) self.setReturnFormat(JSON) self.query_head = query_head self.query_foot = query_foot self.max_body_size = self.MAX_QUERY_SIZE - len(self.query_head) - len(self.query_foot)
class halo: def __init__(self): self.config = json.load(open("../config/config.json")) self.sparql = SPARQLWrapper("http://dbpedia.org/sparql") self.termDB = MongoClient()["semantified"]["terms"] self.halodb = MongoClient()["halo"]["halos"] def run(self,query): try: self.sparql.setQuery(query) self.sparql.setReturnFormat(JSON) result = self.sparql.query() #jsonlayer.use('cjson') body = result.response.read().encode('ascii','ignore') fixed_body = body.decode("ascii") result = jsonlayer.decode(fixed_body) return result["results"]["bindings"] except : print(query) time.sleep(60) return self.run(query) def makeQuery(self,uri,querykey): return self.config[querykey] % (uri) def insert(self,obj): self.halodb.update({"_id":obj["_id"]},obj,True) def isprocessed(self,uri): return len(list(self.halodb.find({"_id" : uri}))) > 0 def getHalo(self,uri): if not self.isprocessed(uri): query = self.makeQuery(uri,"queryone") result = self.run(query) query = self.makeQuery(uri,"querytwo") result.extend(self.run(query)) halo = {} halo["_id"] = uri halo["uri"] = uri halo["halo"] = {} for each in result: halouri = each["aura"]["value"] halo["halo"][halouri.replace(".","$")] = {} obj = {} obj["halouri"] = halouri obj["count"] = each["auraCount"]["value"] obj["label"] = each["label"]["value"] halo["halo"][halouri.replace(".","$")] = obj self.insert(halo) print("processed halo for : " + uri) else : print("previously processed uri : " + uri ) def getdatadb(self): return self.termDB.find(timeout=False) def processhalofromdb(self): data = self.getdatadb() for each in data : alluri = each["allURI"] map(self.getHalo,alluri)
def get_triples(self): sparql = SPARQLWrapper(self.endpoint) sparql.setQuery(self.queries["describe"] % (self.uri, self.graph)) g = sparql.query().convert() logging.debug("Returning %d triples describing resource <%s>" % (len(g), self.uri)) #FIXME: enrich with metadata for prefix, namespace in self.conf.data.namespaces(): g.bind(prefix, namespace) return g
def sendSparqlQuery(query, endpoint, reasoning='false'): sparql = SPARQLWrapper(endpoint) sparql.setQuery(query) sparql.setReturnFormat(JSON) sparql.addParameter('Accept','application/sparql-results+json') sparql.addParameter('reasoning', reasoning) return sparql.query().convert()
def query_tunnel(request): query = request.GET.get('query') sparql = SPARQLWrapper(endpoint) if bg_user and bg_pw: sparql.setHTTPAuth(BASIC) sparql.setCredentials(bg_user, bg_pw) sparql.setQuery(query) sparql.setReturnFormat(JSON) results = sparql.query().convert() return JsonResponse(results, safe=False)
def insert(self, insQuery): print insQuery sparql = SPARQLWrapper("http://localhost:8890/sparql") sparql.setQuery(""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> INSERT DATA INTO <http://localhost:8890/DAV/home/smob> { <s> <p> <q> } """) results=sparql.query().convert()
def get_proxy(self, uri): ''' Returns the description of a proxy entity ''' logger.info('Get proxy data about {}'.format(uri)) query = self._queries['get_proxy.rq'].replace("__URI__", uri) sparql = SPARQLWrapper(self.sparql) sparql.setQuery(query) data = sparql.query().convert() return data
class ReactomeDataSource(object): def __init__(self): self.sparql = SPARQLWrapper("https://www.ebi.ac.uk/rdf/services/reactome/sparql") def create_reactions(self, taxon, model): self.sparql.setQuery(""" PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX owl: <http://www.w3.org/2002/07/owl#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX dc: <http://purl.org/dc/elements/1.1/> PREFIX dcterms: <http://purl.org/dc/terms/> PREFIX foaf: <http://xmlns.com/foaf/0.1/> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX biopax3: <http://www.biopax.org/release/biopax-level3.owl#> SELECT DISTINCT ?pathway ?pathwayname ?rea ?lid ?rid WHERE { ?pathway rdf:type biopax3:Pathway . ?pathway biopax3:displayName ?pathwayname . ?pathway biopax3:organism <http://identifiers.org/taxonomy/""" + taxon + """> . ?pathway biopax3:pathwayComponent ?rea . ?rea biopax3:left ?l . ?l biopax3:entityReference ?lid . ?rea biopax3:right ?r . ?r biopax3:entityReference ?rid . } """) self.sparql.setReturnFormat(JSON) results = self.sparql.query().convert() lefts = defaultdict(list) rights = defaultdict(list) for r in results['results']['bindings']: lefts[r['rea']['value']].append(r['lid']['value']) rights[r['rea']['value']].append(r['rid']['value']) for r in lefts.keys(): rr = ReactionRule() for l in set(lefts[r]): #print(l.split("/")[-1]) try: rr.add_reactant(Species(l.split("/")[-1])) except: pass for r in set(rights[r]): #print(r.split("/")[-1]) try: rr.add_product(Species(r.split("/")[-1])) except: pass model.add_reaction_rule(rr)
def query(endpoint='', query=''): sparql = SPARQLWrapper(endpoint, returnFormat="json") sparql.setQuery(query) try: ret = sparql.queryAndConvert() except: print('Query Error') return [] return ret['results']['bindings']
class VirtuosoBackend(Backend): def __init__(self, address): self.endpoint = SPARQLWrapper(address + "/sparql/") def query(self, query): self.endpoint.setQuery(query) self.endpoint.setReturnFormat(JSON) results = self.endpoint.query().convert() return results
def insert(self, insQuery): """This function takes in a insert statment and returns whether it was executed fine or not""" print "Triples: "+insQuery sparql = SPARQLWrapper("http://localhost:8890/sparql") sparql.setQuery(""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> INSERT DATA INTO <http://localhost:8890/DAV/home/test> { """+insQuery+"""} """) results=sparql.query().convert()
def insert(self, insQuery): """This function takes in a insert statment and returns whether it was executed fine or not""" print "Triples: "+insQuery sparql = SPARQLWrapper("http://knoesis-twit.cs.wright.edu:8890/sparql") sparql.setQuery(""" PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> INSERT DATA INTO <http://knoesis-twit.cs.wright.edu/dav/dbpedia/categories> { """+insQuery+"""} """) results=sparql.query().convert()
class SPARQLEndpoint(object): # init with endpoint URL def __init__(self, endpoint): self.sparql = SPARQLWrapper(endpoint) self.sparql.setReturnFormat(JSON) # delegate SPARQL query to endpoint def query(self, q): self.sparql.setQuery(q) return self.sparql.query().convert()
def findParents(URI): # Returns a pathList which includes all parents per hop in tuples [(child,parent),(child,parent)] global iup, pathList, endpoint list_out = [] iup += 1 if iup == 1: sparql = SPARQLWrapper(endpoint) sparql.addCustomParameter("infer", "false") sparql.setReturnFormat(JSON) querystring = 'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?super WHERE { <' + URI[ iup - 1][0] + '> rdfs:subClassOf ?super . FILTER isURI(?super) }' sparql.setQuery(querystring) results = sparql.query().convert() for x in results["results"]["bindings"]: list_out.append((URI[iup - 1][0], x["super"]["value"])) else: for i in range(len(URI[iup - 1])): sparql = SPARQLWrapper(endpoint) sparql.addCustomParameter("infer", "false") sparql.setReturnFormat(JSON) querystring = 'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?super WHERE { <' + URI[ iup - 1][i][1] + '> rdfs:subClassOf ?super . FILTER isURI(?super) }' sparql.setQuery(querystring) results = sparql.query().convert() for x in results["results"]["bindings"]: list_out.append((URI[iup - 1][i][1], x["super"]["value"])) if len(list_out) > 0: URI.append(list_out) findParents(URI) else: iup = 0 pathList = URI return pathList
try: query = query + ' LIMIT %s' % int(getattr(context, LIMIT)) except (ValueError, TypeError, AttributeError): pass try: query = query + ' OFFSET %s' % int(getattr(context, OFFSET)) except (ValueError, TypeError, AttributeError): pass self.resetQuery() if self._is_contextual(context): self.addParameter("default-graph-uri", context.identifier) self.timeout = self._timeout self.setQuery(query) doc = ElementTree.parse(SPARQLWrapper.query(self).response) # ElementTree.dump(doc) for rt, vars in _traverse_sparql_result_dom( doc, as_dictionary=True, node_from_result=self.node_from_result): yield (rt.get(s, s), rt.get(p, p), rt.get(o, o)), None def triples_choices(self, (subject, predicate, object_), context=None): """ A variant of triples that can take a list of terms instead of a single term in any slot. Stores can implement this to optimize the response time from the import default 'fallback' implementation, which will iterate over each term in the list and dispatch to triples. """ raise NotImplementedError('Triples choices currently not supported')
def get_types_d(resource): ''' given a single resource return every predicate in a common dict with the predicate name as key and the corresponding url and ns e.g: Jacques_Tati ''' ns = "http://dbpedia.org" dtype = "resource" #prefix db-owl: <http://dbpedia.org/ontology/> q = ''' prefix db-owl: <http://dbpedia.org/ontology/> SELECT ?type WHERE { <http://dbpedia.org/resource/%s> rdf:type ?type . } ''' %(resource) sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setQuery(q) sparql.setReturnFormat(JSON) is_type_of = {} results = sparql.query().convert() nb_results = len(results["results"]["bindings"]) if nb_results == 0: raise Exception("No results found for %s" %resource) type_urls = [r["type"]["value"] for r in results["results"]["bindings"]] for val in type_urls: # for r in results["results"]["bindings"]: if "#" in val: ns = val.split("#")[0] type_v = val.split("#")[-1] type_v = re.sub('s$', '', type_v) else: ns = "/".join(val.split("/")[:-1]) if "entity" in val: #wikidata IDS r = requests.get(val) r_json =r.json() type_v = [e["labels"]["en"]["value"] for e in r_json["entities"].values()][0].lower() else: m = re.match('(?P<name>.*?)(?P<id>\d+)$', val) if m is not None: type_v = m.group("name").split("/")[-1] else: if "Yago" in val or "Wikicat" in val: type_v = re.split("/(Yago|Wikicat)", val)[-1] else: type_v = val.split("/")[-1] #finally mapping if type_v in is_type_of.keys(): is_type_of[type_v]["urls"].append(val) is_type_of[type_v]["ns"].append(ns) else: is_type_of[type_v] = {"urls": [val], "ns":[ns]} is_type_of[type_v]["resource"] = resource return is_type_of
class SEARCH: def __init__(self, url_pbg): #define cache directory self.cache = "cache/" #define url self.url_pbg = url_pbg #define sparql self.sparql_pbg = SPARQLWrapper(self.url_pbg) self.sparql_pbg.setReturnFormat(JSON) def cache_name(self, method, parameters): key = method + "_" + hashlib.md5(pickle.dumps(parameters)).hexdigest() return (key) def get_location(self, id): filename = self.cache + self.cache_name("get_location", id) try: infile = open(filename, "rb") new_object = pickle.load(infile) infile.close() return (new_object) except FileNotFoundError: file = open("queries/gene_location.sparql", "r") query = file.read() file.close() self.sparql_pbg.setQuery(query % id) # JSON example response = self.sparql_pbg.query().convert() result = [] if response["results"]["bindings"]: for item in response["results"]["bindings"]: result.append([ item["gene_id"]["value"], item["chromosome"]["value"], item["begin_ref"]["value"], item["begin_pos"]["value"], item["end_ref"]["value"], item["end_pos"]["value"] ]) df = pd.DataFrame(result) df.columns = [ "gene_id", "chromosome", "begin_ref", "begin_pos", "end_ref", "end_pos" ] df = df.set_index("gene_id") df["begin_pos"] = pd.to_numeric(df["begin_pos"]) df["end_pos"] = pd.to_numeric(df["end_pos"]) #cache outfile = open(filename, "wb") pickle.dump(df, outfile) outfile.close() return df else: return pd.DataFrame() def compute_interval(self, g1, g2): locations = pd.concat([self.get_location(g1), self.get_location(g2)]) display(locations[["location"]]) if (len(locations.index) != 2): print("unexpected number of rows in locations:", len(locations.index)) elif (locations.iloc[0]['end_pos'] > locations.iloc[1]['begin_pos']) & (g1 != g2): print("unexpected order", locations.index[0], "and", locations.index[1]) else: result = [] if locations.iloc[0]["end_pos"] > locations.iloc[0]["begin_pos"]: result.append([ "begin", locations.iloc[0]["end_ref"], locations.iloc[0]["end_pos"] ]) else: result.append([ "begin", locations.iloc[0]["begin_ref"], locations.iloc[0]["begin_pos"] ]) if locations.iloc[1]["begin_pos"] < locations.iloc[1]["end_pos"]: result.append([ "end", locations.iloc[1]["begin_ref"], locations.iloc[1]["begin_pos"] ]) else: result.append([ "end", locations.iloc[1]["end_ref"], locations.iloc[1]["end_pos"] ]) df = pd.DataFrame(result) df.columns = ["type", "ref", "pos"] df = df.set_index("type") return df def make_interval(self, ref, start, end): result = [] result.append(["begin", ref, start]) result.append(["end", ref, end]) df = pd.DataFrame(result) df.columns = ["type", "ref", "pos"] df = df.set_index("type") return df def interval_genes(self, interval): filename = self.cache + self.cache_name("interval_genes", interval) try: infile = open(filename, "rb") new_object = pickle.load(infile) infile.close() return (new_object) except FileNotFoundError: file = open("queries/interval_genes.sparql", "r") query = file.read() file.close() self.sparql_pbg.setQuery( query % { "beginRef": interval.loc["begin"]["ref"], "beginPos": interval.loc["begin"]["pos"], "endRef": interval.loc["end"]["ref"], "endPos": interval.loc["end"]["pos"] }) # JSON example response = self.sparql_pbg.query().convert() result = [] if response["results"]["bindings"]: for item in response["results"]["bindings"]: row = [] row.append(item["gene_id"]["value"]) row.append(item["chromosome"]["value"]) row.append(item["begin_pos"]["value"]) row.append(item["end_pos"]["value"]) result.append(row) df = pd.DataFrame(result) df.columns = ["gene_id", "chromosome", "start", "end"] #cache outfile = open(filename, "wb") pickle.dump(df, outfile) outfile.close() return df else: return pd.DataFrame() def go_genes(self, graphEnsembl, graphUniprot, go): filename = self.cache + self.cache_name( "go_genes", [graphEnsembl, graphUniprot, go]) try: infile = open(filename, "rb") new_object = pickle.load(infile) infile.close() return (new_object) except FileNotFoundError: file = open("queries/go_genes.sparql", "r") query = file.read() file.close() self.sparql_pbg.setQuery( query % { "graphEnsembl": graphEnsembl, "graphUniprot": graphUniprot, "go": go }) # JSON example response = self.sparql_pbg.query().convert() result = [] if response["results"]["bindings"]: for item in response["results"]["bindings"]: row = [] row.append(item["gene_count"]["value"]) row.append(item["gene_with_go_count"]["value"]) result.append(row) df = pd.DataFrame(result) df.columns = ["gene_count", "gene_with_go_count"] #cache outfile = open(filename, "wb") pickle.dump(df, outfile) outfile.close() return df else: return pd.DataFrame() def gene_goterms(self, id): filename = self.cache + self.cache_name("gene_goterms", id) try: infile = open(filename, "rb") new_object = pickle.load(infile) infile.close() return (new_object) except FileNotFoundError: file = open("queries/gene_goterm.sparql", "r") query = file.read() file.close() self.sparql_pbg.setQuery(query % id) # JSON example response = self.sparql_pbg.query().convert() result = [] if response["results"]["bindings"]: for item in response["results"]["bindings"]: row = [] row.append(item["gene_id"]["value"]) row.append(item["go_id"]["value"]) row.append(item["go_term"]["value"]) row.append(item["go_cat"]["value"]) row.append(item["graph_ensembl"]["value"]) row.append(item["graph_uniprot"]["value"]) result.append(row) df = pd.DataFrame(result) df.columns = [ "gene_id", "go_id", "go_term", "go_cat", "graph_ensembl", "graph_uniprot" ] #cache outfile = open(filename, "wb") pickle.dump(df, outfile) outfile.close() return df else: return pd.DataFrame() def genes_goterms(self, ids): list = [] for id in ids: list.append(self.gene_goterms(id)) return pd.concat(list).reset_index(drop=True) def get_go_numbers(self, goterms, genes): #construct the number of genes with/without goterm graphs = list( goterms.groupby(["graph_ensembl", "graph_uniprot"]).indices.keys()) golist = goterms["go_id"].unique() #construct df df = pd.DataFrame(goterms.groupby("go_id").size(), columns=["interval_genes_annotated"]) #add gene numbers df["interval_genes_not_annotated"] = len( genes.index) - df["interval_genes_annotated"] df["outside_genes_annotated"] = 0 df["outside_genes_not_annotated"] = 0 df["total_genes"] = 0 for go in golist: for graph in graphs: result = self.go_genes(graph[0], graph[1], go) df.loc[go, "outside_genes_annotated"] = df.loc[ go, "outside_genes_annotated"] + int( result.loc[0, "gene_with_go_count"]) df.loc[go, "total_genes"] = df.loc[go, "total_genes"] + int( result.loc[0, "gene_count"]) df.loc[go, "outside_genes_annotated"] = df.loc[ go, "outside_genes_annotated"] - df.loc[go, "interval_genes_annotated"] df.loc[go, "outside_genes_not_annotated"] = df.loc[ go, "total_genes"] - df.loc[ go, "outside_genes_annotated"] - df.loc[ go, "interval_genes_annotated"] - df.loc[ go, "interval_genes_not_annotated"] #do fisher tests for go in golist: m = [[ df.loc[go, "interval_genes_annotated"], df.loc[go, "outside_genes_annotated"] ], [ df.loc[go, "interval_genes_not_annotated"], df.loc[go, "outside_genes_not_annotated"] ]] df.loc[go, "p_less"] = stats.fisher_exact(m, alternative="less")[1] df.loc[go, "p_greater"] = stats.fisher_exact(m, alternative="greater")[1] df["p_adjusted"] = p_adjust(df["p_greater"], method="BH") return df
class WDSparqlQueries(object): """ params: optional depending on type of query (for qid provide prop and string, for label provide qid) extendable wrapper for sparql queries in WD """ def __init__(self, qid=None, prop=None, string=None): self.qid = qid self.prop = prop self.string = string self.endpoint = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql") self.wd = 'PREFIX wd: <http://www.wikidata.org/entity/>' self.wdt = 'PREFIX wdt: <http://www.wikidata.org/prop/direct/>' def execute_query(self, query): self.endpoint.setQuery(query) self.endpoint.setReturnFormat(JSON) return self.endpoint.query().convert() def wd_prop2qid(self): """ :param prop: 'P351' Entrez gene id (ex. print( SPARQL_for_qidbyprop('P351','899959'))) :param string: '899959' String value :return: QID Q21514037 """ arguments = '?gene wdt:{} "{}"'.format(self.prop, self.string) select_where = 'SELECT * WHERE {{{}}}'.format(arguments) query = self.wdt + " " + select_where results = self.execute_query(query) final_qid = [] try: rawqid = results['results']['bindings'][0]['gene']['value'] qid_list = rawqid.split('/') final_qid.append(qid_list[-1]) except Exception: final_qid.append('None') return final_qid[0] def wd_qid2label(self): """ :param string: 'Q2458943' String value :return: QID 'Label' """ arguments = ' wd:{} rdfs:label ?label. Filter (LANG(?label) = "en") .'.format(self.qid) select_where = 'SELECT ?label WHERE {{{}}}'.format(arguments) query = self.wd + " " + select_where results = self.execute_query(query) final_qid = [] try: rawqid = results['results']['bindings'][0]['label']['value'] final_qid.append(rawqid) except Exception: final_qid.append('None') return final_qid[0] def wd_qid2property(self): """ :param string: 'Q2458943' String value :return: "property value' """ arguments = ' wd:{} wdt:{} ?prop.'.format(self.qid, self.prop) select_where = 'SELECT ?prop WHERE {{{}}}'.format(arguments) query = self.wd + " " + select_where results = self.execute_query(query) final_qid = [] try: rawqid = results['results']['bindings'][0]['prop']['value'] final_qid.append(rawqid) except Exception: final_qid.append('None') return final_qid[0].split("/")[-1]
#!/usr/bin/python # -*- coding: utf-8 -*- import rdflib import os import shutil from json import JSONEncoder from SPARQLWrapper import SPARQLWrapper, JSON tps_graph = "http://vitali.web.cs.unibo.it/raschietto/graph/ltw1543" query = """SELECT ?s ?p ?o { GRAPH <%s> {?s ?p ?o .} }""" % (tps_graph) # NB: Usare 'DELETE' al posto di 'INSERT' per rimuovere # i dati dal triplestore sparql = SPARQLWrapper("http://tweb2015.cs.unibo.it:8080/data/query", returnFormat="json") sparql.setQuery(query) sparql.setMethod('POST') q = sparql.query() print JSONEncoder().encode(q.convert())
def generate_hcls_from_sparql(sparql_endpoint, rdf_distribution_uri, g=Graph()): """Query the provided SPARQL endpoint to compute HCLS metadata""" sparql = SPARQLWrapper(sparql_endpoint) root = pathlib.Path(__file__).parent.resolve() with open(root / '../FAIRMETADATA_FAILED_QUERIES.md', 'w') as f: f.write('# Failing HCLS SPARQL queries\n\n\n') with open(root / '../FAIRMETADATA_SUCCESS_QUERIES.md', 'w') as f: f.write('# Generated HCLS metadata\n\n\n') query_prefixes = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX dqv: <http://www.w3.org/ns/dqv#> PREFIX hcls: <http://www.w3.org/hcls#> PREFIX xsd: <http://www.w3.org/2001/XMLSchema#> PREFIX skos: <http://www.w3.org/2004/02/skos/core#> PREFIX dctypes: <http://purl.org/dc/dcmitype/> PREFIX dcat: <http://www.w3.org/ns/dcat#> PREFIX void: <http://rdfs.org/ns/void#> PREFIX void-ext: <http://ldf.fi/void-ext#>\n""" query_select_all_graphs = 'SELECT DISTINCT ?graph WHERE { GRAPH ?graph {?s ?p ?o} }' sparql.setQuery(query_select_all_graphs) sparql.setReturnFormat(JSON) results = sparql.query().convert() # print('Get all graphs query Results:') # print(results) select_all_graphs_results = results["results"]["bindings"] # Compute HCLS metadata per graph for graph_row in select_all_graphs_results: graph = graph_row['graph']['value'] print('Computing HCLS metadata for graph ' + graph) for filename in os.listdir( pkg_resources.resource_filename('fair_metadata', 'queries')): with open( pkg_resources.resource_filename('fair_metadata', 'queries/' + filename), 'r') as f: if (graph): sparql_query = f.read().replace('?_graph_uri', graph) sparql_query = sparql_query.replace( '<?_graph_start>', 'GRAPH <' + graph + '> {') sparql_query = sparql_query.replace('<?_graph_end>', '}') else: sparql_query = f.read().replace('?_graph_uri', rdf_distribution_uri) sparql_query = sparql_query.replace('<?_graph_start>', '') sparql_query = sparql_query.replace('<?_graph_end>', '') complete_query = query_prefixes + sparql_query # print(complete_query) try: sparql.setQuery(complete_query) sparql.setReturnFormat(TURTLE) # sparql.setReturnFormat(JSONLD) results = sparql.query().convert() # g.parse(data=results, format="turtle") # g.parse(data=results, format="json-ld") hcls_graph = Graph() hcls_graph.parse(data=results, format="turtle") g += hcls_graph with open(root / '../FAIRMETADATA_SUCCESS_QUERIES.md', 'a') as f: f.write('## Returned RDF \n\n```turtle\n' + results.decode('utf-8') + "\n```\n\n" + 'Query: \n\n```sparql\n' + complete_query + "\n```\n\n" + 'In SPARQL endpoint: ' + sparql_endpoint + "\n\n---\n") except Exception as e: print('SPARQL query failed:') print(complete_query) print(e) with open(root / '../FAIRMETADATA_FAILED_QUERIES.md', 'a') as f: f.write('## Query failed \n\n```sparql\n' + complete_query + "\n```\n\n" + 'In SPARQL endpoint: ' + sparql_endpoint + "\n> " + str(e) + "\n\n---\n") # print(g.serialize(format='json-ld', indent=4)) # print(g.serialize(format='turtle', indent=4)) return g # { # "@context": "/contexts/GraphMap", # "@id": "/graph_maps", # "@type": "hydra:Collection", # "hydra:member": [ # { # "@id": "/graph_maps/3", # "@type": "http://example.org/GraphMap", # "subjectType": "http://www.w3.org/2000/01/rdf-schema#Resource", # "predicate": "http://semanticscience.org/resource/has-participant", # "objectType": "http://www.w3.org/2000/01/rdf-schema#Resource", # "dataset": "/datasets/3", # "id": 3 # }, # { # "@id": "/graph_maps/4", # "@type": "http://example.org/GraphMap", # "subjectType": "http://www.w3.org/2000/01/rdf-schema#Resource", # "predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", # "objectType": "http://www.ebi.ac.uk/efo/EFO_0001067", # "dataset": "/datasets/3", # "id": 4 # } # ], # "hydra:totalItems": 2 # } # { # "@context": "/contexts/Dataset", # "@id": "/datasets", # "@type": "hydra:Collection", # "hydra:member": [ # { # "@id": "/datasets/3", # "@type": "http://www.w3.org/ns/dcat#Dataset", # "identifier": "mw1", # "title": "Infections", # "description": "A dataset of infections", # "publisher": "http://fairdata.systems", # "license": "http://fairdata.systems/dataset/infections/license", # "publicationDate": "2020-11-12T11:25:00+00:00", # "publisher_name": "Mark Wilkinson", # "graphmaps": [ # "/graph_maps/3", # "/graph_maps/4" # ], # "dataservices": [ # "/data_services/1" # ], # "id": 3 # } # ], # "hydra:totalItems": 1 # } # { # "@context": "/contexts/DataService", # "@id": "/data_services", # "@type": "hydra:Collection", # "hydra:member": [ # { # "@id": "/data_services/1", # "@type": "http://www.w3.org/ns/dcat#DataService", # "name": "Infections endpoint", # "description": "A SPARQL endpoint with infection data", # "url": "http://fairdata.systems:8990/sparql", # "serviceType": "SPARQL", # "conformsTo": "https://www.w3.org/TR/sparql11-overview/", # "publisher": "http://fairdata.systems", # "dataset": "/datasets/3", # "id": 1 # } # ], # "hydra:totalItems": 1 # }
class WriterPlugin(RDFWriter): def __init__(self, reader, *args, **kwargs): super(WriterPlugin, self).__init__(reader, *args, **kwargs) if isinstance(self.reader, ReaderPlugin): self._endpoint = self.reader.endpoint else: self._endpoint = kwargs.get("endpoint") self._combine_queries = kwargs.get("combine_queries") self._results_format = JSON self._sparql_wrapper = SPARQLWrapper(self._endpoint, returnFormat=self._results_format) user = kwargs.get('user', None) password = kwargs.get('password', None) if user is not None and password is not None: self._sparql_wrapper.setCredentials(user, password) self._sparql_wrapper.setMethod("POST") default_graph = kwargs.get('default_graph', None) if default_graph: self._sparql_wrapper.addDefaultGraph(default_graph) @property def endpoint(self): return self._endpoint def _save(self, *resources): for context, items in _group_by_context(resources).items(): # Deletes all triples with matching subjects. remove_query = _prepare_delete_many_query(items, context) insert_query = _prepare_add_many_query(items, context) self._execute(remove_query, insert_query) def _update(self, *resources): for context, items in _group_by_context(resources).items(): # Explicitly enumerates triples for deletion. remove_query = _prepare_selective_delete_query(items, context) insert_query = _prepare_add_many_query(items, context) self._execute(remove_query, insert_query) def _remove(self, *resources, **kwargs): for context, items in _group_by_context(resources).items(): # Deletes all triples with matching subjects. inverse = kwargs.get("inverse") query = _prepare_delete_many_query(items, context, inverse) self._execute(query) def _size(self): """ Return total count of triples, not implemented. """ raise NotImplementedError def _add_triple(self, s=None, p=None, o=None, context=None): self._add(s, p, o, context) def _set_triple(self, s=None, p=None, o=None, context=None): self._remove_from_endpoint(s, p, context=context) self._add(s, p, o, context) def _remove_triple(self, s=None, p=None, o=None, context=None): self._remove_from_endpoint(s, p, o, context) def _execute(self, *queries): """ Execute several queries. """ translated = [str(query) for query in queries] if self._combine_queries: translated = ["\n".join(translated)] try: for query_str in translated: debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query() return True except EndPointNotFound as _: raise_(SparqlWriterException, "Endpoint not found", sys.exc_info()[2]) except QueryBadFormed as _: raise_(SparqlWriterException, "Bad query: %s" % query_str, sys.exc_info()[2]) except Exception as e: msg = "Exception: %s (query: %s)" % (e, query_str) raise_(SparqlWriterException, msg, sys.exc_info()[2]) def _add_many(self, triples, context=None): debug("ADD several triples") query = insert() if context: query.into(context) for s, p, o in triples: query.template((s, p, o)) query_str = str(query) try: debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query().convert() return True except EndPointNotFound as _: raise_(SparqlWriterException, "Endpoint not found", sys.exc_info()[2]) except QueryBadFormed as _: raise_(SparqlWriterException, "Bad query: %s" % query_str, sys.exc_info()[2]) except Exception as e: raise_(SparqlWriterException, "Exception: %s" % e, sys.exc_info()[2]) def _add(self, s, p, o, context=None): return self._add_many([(s, p, o)], context) def _remove_from_endpoint(self, s=None, p=None, o=None, context=None): debug('REM : %s, %s, %s, %s' % (s, p, o, context)) query = delete() try: if s is None and p is None and o is None and context: query = clear().graph(context) else: if context: query = delete().from_(context) query.template(("?s", "?p", "?o")) if context: where_group = NamedGroup(context) else: where_group = Group() where_group.append(("?s", "?p", "?o")) filters = self.__build_filter(s, p, o) if filters: filter = Filter("({0})".format(filters)) where_group.append(filter) query.where(where_group) query_str = str(query) debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query().convert() return True except EndPointNotFound as _: error("SPARQL endpoint not found") except QueryBadFormed as _: error("Bad-formed SPARQL query") except SPARQLWrapperException as _: error("SPARQLWrapper exception") return None def __build_filter(self, s, p, o): vars = [(s, '?s'), (p, '?p'), (o, '?o')] parts = [] for var in vars: if var[0] is not None: parts.append("%s = %s" % (var[1], self._term(var[0]))) return " and ".join(parts) def index_triples(self, **kwargs): """ performs index of the triples if such functionality is present, returns True if operation successful """ # SPARQL/Update does not support indexing operation return False def load_triples(self, source=None, context=None): """ Load resources on the web into the triple-store. :param str source: path to the sources of triples to load :param context: the given context :return: True if successful :rtype: bool """ if source: query = load() query.load(remote_uri=source) if context: query.into(context) query_str = str(query) debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query().convert() return True return False def _clear(self, context=None): """ Clear the triple-store. """ self._remove_from_endpoint(None, None, None, context=context) def _term(self, term): if isinstance(term, (URIRef, BNode)): return u'{0:s}'.format elif isinstance(term, (str, str)): if term.startswith('?'): return u'{0:s}'.format(term) elif is_uri(term): return u'<{0:s}>'.format(term) else: return u'"{0:s}"'.format(term) elif type(term) is Literal: return term.n3() elif isinstance(term, (list, tuple)): return '"{0:s}"@{1:s}'.format(term[0], term[1]) elif type(term) is type and hasattr(term, 'uri'): return u'{0:s}'.format elif hasattr(term, 'subject'): return u'{0:s}'.format return term.__str__()
def run_query(self, query): all_results = [] tries = [] app.logger.debug(self.endpoints) get_tries = [(endpoint, 'GET') for endpoint in self.endpoints] post_tries = [(endpoint, 'POST') for endpoint in self.endpoints] app.logger.debug(get_tries) app.logger.debug(post_tries) tries.extend(post_tries) tries.extend(get_tries) app.logger.debug(tries) for (endpoint, method) in tries: try: sw = SPARQLWrapper(endpoint) sw.setMethod(method) sw.setReturnFormat(JSON) sw.setQuery(query) app.logger.debug("Calling endpoint {}".format(endpoint)) # Will give problems if e.g. the GET URI is too long, or the endpoint does not respond within reasonable time. results = sw.queryAndConvert() app.logger.debug("Done") # Will give problems if the return type is not what we expected (e.g. XML instead of JSON) if "results" in results: all_results.extend(results["results"]["bindings"]) app.logger.debug("Found {} results".format(len(results))) except: app.logger.warning( "Endpoint at {} did not work as expected. Maybe it's down?" .format(endpoint)) exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback, limit=2, file=sys.stdout) app.logger.debug("Continuing with next endpoint...") continue else: if not self.all: app.logger.debug( "Finally, after calling {}".format(endpoint)) app.logger.debug(all_results) break else: app.logger.debug( "Continuing with next endpoint... (calling all)") continue app.logger.debug("Returning results from run_query") app.logger.debug(all_results) return all_results
from typing import List, Tuple from SPARQLWrapper import SPARQLWrapper, JSON import json import urllib from pathlib import Path from tqdm import tqdm sparql = SPARQLWrapper("http://cse-cnc238814s.cse.ohio-state.edu:3093/sparql") sparql.setReturnFormat(JSON) path = str(Path(__file__).parent.absolute()) with open(path + '/../ontology/fb_roles', 'r') as f: contents = f.readlines() roles = set() for line in contents: fields = line.split() roles.add(fields[1]) def execute_query(query: str) -> List[str]: sparql.setQuery(query) try: results = sparql.query().convert() except urllib.error.URLError: print(query) exit(0) rtn = [] for result in results['results']['bindings']: assert len(result) == 1 # only select one variable
class QueryMaker: # Init function def __init__(self): self.query = "" self.order = "" self.paramsList = [] if not (hasattr(self, "graph")): self.normalGraph = Graph() self.appGraph = Graph() self.sparql = SPARQLWrapper("http://localhost:9000/sparql") self.graph = self.normalGraph # END IF # END FUNCTION # addSelect(*string) -> () # Allows user to choose which parameters will be retrieved # example: addSelect("?Measure", "?Station") def addSelect(self, *paramsToSelect: str): self.query = "SELECT DISTINCT\n\t" for param in paramsToSelect: self.query = self.query + param + " " # END FOR self.query = self.query[0:len(self.query) - 1] self.query = self.query + "\nWHERE {\n\t" # END FUNCTION # addParam(string, string, string) -> () # Inserts into the query the triplet subject, predicate, object # example: addParam("?Measure", "rdf:type", "ns:Measurement") def addParam(self, s: str, p: str, o: str): self.paramsList.append((s, p, o)) # END FUNCTION # addFilter(string) -> () # Inserts into the query a filtering sentence # example: addFilter("REGEX (?StLabel, \"Moratalaz\")") def addFilter(self, filter: str): filter = self.fixDate(filter) self.paramsList.append(("\tFILTER", filter)) # END FUNCTION # addOrder(string) -> () # Orders the result of the query with the ordering sentence passed # example: addOrder("xsd:integer(?Code)") def addOrder(self, order: str): self.order = self.order + "ORDER BY " + order # END FUNCTION # executeQuery () -> List<Dictionary> # Queries the graph and returns a list with the dictionary for each row # example: executeQuery() -> [{"Measure":"http:/...", "Station":"http:/..."}, # "Measure":"http:/...", "Station":"http:/..."} # ] def executeQuery(self): for param in self.paramsList: for item in param: self.query = self.query + item + " " # END FOR if "\tFILTER" in param: self.query = self.query[0:len(self.query) - 1] else: self.query = self.query + "." # END IF-ELSE self.query = self.query + "\n\t" # END FOR self.query = self.query[0:len(self.query) - 1] self.query = self.query + "}" if not (self.order == ""): self.query = self.query + "\n" + self.order # END IF self.query = self.getNamespaces() + self.query self.sparql.setQuery(self.query) self.sparql.setReturnFormat(JSON) results = self.sparql.query().convert() listResult = [] for row in results["results"]["bindings"]: rowDict = {} for key in row.keys(): rowDict[key] = row[key]["value"] if (key == "Date"): rowDict[key] = row[key]["value"][0:len(row[key]["value"]) - 10] # END IF # END FOR listResult.append(rowDict) # END FOR return listResult # END FUNCTION ## appQuery() -> List<Dictionary> # Queries the measurements with given filters (or not) # example: appQuery([False, False, False], []) -> List of all measurements # example: appQuery([True, False, False], [{"Place":"District","ID":"#districtID"}]) -> List of measurements in given district # example: appQuery([True, False, False], [{"Place":"Street","ID":"#streetID"}]) -> List of measurements in given street # example: appQuery([True, False, False], [{"Place":"Station","ID":"#stationCode"}]) -> List of measurements in given station # example: appQuery([False, True, False], ["2014"]) -> List of measurements in 2014 # example: appQuery([False, True, False], ["2014-04"]) -> List of measurements in April 2014 # example: appQuery([False, True, False], ["2014-04-26"]) -> List of measurements in 26th April 2014 # example: appQuery([False, False, True], ["#magnitudeID"]) -> List of measurements of given magnitude def appQuery(self, paramsUsed: list, paramsList: list): paramsList.reverse() self.addSelect("?Measure", "?StationLb", "?Date", "?MagnitudeLbEs", "?MagnitudeLbEn", "?MagnitudeCode", "?Value") self.addParam("?Measure", "rdf:type", "ns:Measurement") self.addParam("?Measure", "ns:measuredAt", "?Station") self.addParam("?Station", "rdfs:label", "?StationLb") self.addParam("?Measure", "ns:dateOfMeasure", "?Date") self.addParam("?Magnitude", "rdf:type", "ns:Magnitude") self.addParam("?Magnitude", "rdfs:label", "?MagnitudeLbEs , ?MagnitudeLbEn") self.addFilter( "(LANG(?MagnitudeLbEn) = \'en\' && LANG(?MagnitudeLbEs) = \'es\')") self.addParam("?Measure", "ns:measuredMagnitude", "?Magnitude") self.addParam("?Measure", "ns:measureValue", "?Value") self.addParam("?Magnitude", "ns:measureCode", "?MagnitudeCode") if paramsUsed[0] == True: dictionary = paramsList.pop() placeType = dictionary["Place"] identifier = dictionary["ID"] if placeType == "District": self.addParam("?District", "rdf:type", "ns:District") self.addParam( "?District", "ns:districtID", "\"{}\"^^<http://www.w3.org/2001/XMLSchema#integer>". format(identifier)) self.addParam("?Station", "ns:inDistrict", "?District") elif placeType == "Street": self.addParam("?Street", "rdf:type", "ns:Street") self.addParam( "?Street", "ns:streetID", "\"{}\"^^<http://www.w3.org/2001/XMLSchema#integer>". format(identifier)) self.addParam("?Station", "ns:inStreet", "?Street") elif placeType == "Station": self.addParam("?Station", "rdf:type", "ns:Station") self.addParam( "?Station", "ns:stationCode", "\"{}\"^^<http://www.w3.org/2001/XMLSchema#string>".format( identifier)) else: print("Place " + placeType + " not identified") exit() # END IF # END IF if paramsUsed[1] == True: date = paramsList.pop() splitted = date.split("-") if len(splitted) == 1: self.addFilter("REGEX (STR(?Date), \"^{}\", \"i\")".format( splitted[0])) elif len(splitted) == 2: self.addFilter("REGEX (STR(?Date), \"^{}-{}\", \"i\")".format( splitted[0], splitted[1])) elif len(splitted) == 3: self.addFilter( "REGEX (STR(?Date), \"^{}-{}-{}\", \"i\")".format( splitted[0], splitted[1], splitted[2])) else: print("Date " + date + " wrong formatted (use YYYY-MM-DD)") exit() # END IF # END IF if paramsUsed[2] == True: magnitude = paramsList.pop() self.addParam( "?Magnitude", "ns:measureCode", "\"{}\"^^<http://www.w3.org/2001/XMLSchema#string>".format( magnitude)) # END IF self.addOrder("asc(?Date)") listResult = self.executeQuery() return listResult # END FUNCTION # cleanQuery () -> () # Flushes the current query and params in order to prepare a new one def cleanQuery(self): self.__init__() # END FUNCTION # [private function] getNamespaces() -> Dictionary # Returns the dictionary with the namespaces used in the current query # example: getNamespaces() -> {"ns":ns, "rdfs":RDFS, "rdf":RDF} def getNamespaces(self): initNs = "" if (self.query.find("ns:") > 0): initNs = initNs + "PREFIX ns: <http://www.semanticweb.org/group16/ontologies/air-quality#>\n" "" if (self.query.find("wiki:") > 0): initNs = initNs + "PREFIX wiki: <http://www.wikidata.org/entity/>\n" "" if (self.query.find("rdf:") > 0): initNs = initNs + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" "" if (self.query.find("rdfs:") > 0): initNs = initNs + "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" "" if (self.query.find("owl:") > 0): initNs = initNs + "PREFIX owl: <http://www.w3.org/2002/07/owl#>\n" "" if (self.query.find("sc:") > 0): initNs = initNs + "PREFIX sc: <https://schema.org/>\n" "" return initNs # END FUNCTION # [private function] fixDate(string) -> string # Returns a fixed fitler for date queries # example: fixDate("REGEX (STR(?Date), "^2012-1-30", "i")" -> "REGEX (STR(?Date), "^2012-01-30", "i")" def fixDate(self, filter: str): idx = filter.find("^") if idx > 0: substr = filter[idx + 1:] splitted = substr.split("\"") date = splitted[0] splitted = date.split("-") if len(splitted) > 1 and splitted[1].find("0") < 0 and len( splitted[1]) == 1: splitted[1] = "0" + splitted[1] if len(splitted) > 2 and splitted[2].find("0") < 0 and len( splitted[2]) == 1: splitted[2] = "0" + splitted[2] if len(splitted) == 1: date = splitted[0] elif len(splitted) == 2: date = splitted[0] + "-" + splitted[1] else: date = splitted[0] + "-" + splitted[1] + "-" + splitted[2] filter = "REGEX (STR(?Date), \"^{}\", \"i\")".format(date) return filter
def update_query(self, repository_name, query): sparql = SPARQLWrapper(self.repository_url_update_tmplt % repository_name) sparql.setQuery(query.strip()) sparql.setMethod('POST') sparql.query()
def __init__(self, endpoint="https://query.wikidata.org/sparql", simplified: bool = True): self.sparql = SPARQLWrapper(endpoint, agent=UserAgent().random) self.sparql.setReturnFormat(JSON) self.simplified = simplified
def fetch_dbpedia_triples(entity_labels, ignored_properties=None): """ Fetch all DBpedia triples for the entities with the given labels (in English), while individually caching triples per entity and ensuring only a single HTTP request is done for the whole list of entities. """ config = yaml.load(open('config.yaml')) db_config = config.get('defaults', {}).get('db', {}) if db_config.get('type', 'mongo') != 'mongo': db_config = {} host = db_config.get('location', 'localhost') db_name = db_config.get('name', 'army_ant') mongo = MongoClient(host) cache = mongo[db_name]['entity_triples'] cache.create_index('label') if ignored_properties is None: ignored_properties = ['http://dbpedia.org/ontology/wikiPageWikiLink'] triples = set([]) entity_uris = set([]) cached_count = 0 for entity_label in entity_labels: cached_entity = cache.find_one({'label': entity_label}) if cached_entity: if 'triples' in cached_entity and len(cached_entity['triples']) > 0: s = (cached_entity['uri'], cached_entity['label']) for triple in cached_entity['triples']: p = (triple['predicate']['uri'], triple['predicate']['label']) o = (triple['object']['uri'], triple['object']['label']) triples.add((s, p, o)) cached_count += 1 else: entity_uris.add( '<http://dbpedia.org/resource/%s>' % urllib.parse.quote_plus(entity_label.replace(' ', '_'))) logger.debug("%d out of %d entities with cached triples" % (cached_count, len(entity_labels))) if len(entity_uris) == 0: return triples sparql = SPARQLWrapper(dbpedia_sparql_url) for entity_uris_chunk in chunks(list(entity_uris), 50): query = ''' SELECT ?s ?sLabel ?p ?pLabel ?o ?oLabel WHERE { VALUES ?s { %s } ?s ?p ?o . ?s rdfs:label ?sLabel . ?p rdfs:label ?pLabel . ?o rdfs:label ?oLabel . FILTER (langMatches(lang(?sLabel), 'en') && langMatches(lang(?pLabel), 'en') && langMatches(lang(?oLabel), 'en')) } ''' % ' '.join(entity_uris_chunk) # print(query) sparql.setQuery(query) sparql.setReturnFormat(JSON) result = sparql.query() data = result.response.read() # print(data.decode('utf-8')) data = json.loads(data.decode('utf-8')) cache_data = {} for binding in data['results']['bindings']: if ignored_properties and binding['p']['value'] in ignored_properties: continue s = (binding['s']['value'], binding['sLabel']['value']) p = (binding['p']['value'], binding['pLabel']['value']) o = (binding['o']['value'], binding['oLabel']['value']) if s not in cache_data: cache_data[s] = [] cache_data[s].append({ 'predicate': {'uri': p[0], 'label': p[1]}, 'object': {'uri': o[0], 'label': o[1]} }) triples.add((s, p, o)) for k, v in cache_data.items(): cache.insert({ 'uri': k[0], 'label': k[1], 'triples': v }) for entity_label in set(entity_labels).difference(cache_data.keys()): cache.insert({ 'label': entity_label, 'triples': [] }) return list(triples)
def get_results(endpoint_url, query): sparql = SPARQLWrapper(endpoint_url) sparql.setQuery(query) sparql.setReturnFormat(JSON) return sparql.query().convert()
# https://rdflib.github.io/sparqlwrapper/ from SPARQLWrapper import SPARQLWrapper, JSON import json import pprint # wikidata query for all the lakes in the US sparql = SPARQLWrapper("https://query.wikidata.org/sparql") sparql.setQuery("""#List of all the lakes in US PREFIX schema: <http://schema.org/> SELECT ?lake ?lakeLabel ?article ?coordinate_location ?lake_inflows ?lake_outflow ?elevation_above_sea_level ?area ?length ?width ?volume_as_quantity ?watershed_area ?perimeter ?residence_time_of_water ?vertical_depth ?GNIS_ID ?GeoNames_ID WHERE { ?lake (wdt:P31/wdt:P279*) wd:Q23397. ?lake wdt:P17 wd:Q30. SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } OPTIONAL { ?article schema:about ?lake. ?article schema:inLanguage "en". ?article schema:isPartOf <https://en.wikipedia.org/>. } OPTIONAL { ?lake wdt:P625 ?coordinate_location. } OPTIONAL { ?lake wdt:P200 ?lake_inflows. } OPTIONAL { ?lake wdt:P201 ?lake_outflow. } OPTIONAL { ?lake wdt:P2044 ?elevation_above_sea_level. } OPTIONAL { ?lake wdt:P2046 ?area. } OPTIONAL { ?lake wdt:P2043 ?length. } OPTIONAL { ?lake wdt:P2049 ?width. } OPTIONAL { ?lake wdt:P2234 ?volume_as_quantity. } OPTIONAL { ?lake wdt:P2053 ?watershed_area. } OPTIONAL { ?lake wdt:P2547 ?perimeter. } OPTIONAL { ?lake wdt:P3020 ?residence_time_of_water. } OPTIONAL { ?lake wdt:P4511 ?vertical_depth. } OPTIONAL { ?lake wdt:P590 ?GNIS_ID. }
#RecupAbstractVillesOcDBpedia.py # créé le: 10/02/2018 # par Eve Séguier # The aim of this program is to get in dbpedia, label and comment of french towns where the word 'occitan' is found in the comment and to putb the result in the file from SPARQLWrapper import SPARQLWrapper, JSON import json import csv endpoint = "http://fr.dbpedia.org/sparql" with open('abstractvillesOcDBpedia.csv', 'w', newline='', encoding='utf-8') as csvfile: spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"') sparql = SPARQLWrapper(endpoint) querystring = """ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> PREFIX db-owl: <http://dbpedia.org/ontology/> SELECT ?ville ?label ?comment ?code WHERE { ?ville db-owl:country <http://fr.dbpedia.org/resource/France> ; db-owl:inseeCode ?code; rdf:type db-owl:Settlement ; rdfs:comment ?comment ; rdfs:label ?label FILTER regex(?comment,".*occitan.*") FILTER langmatches(lang(?label),"fr") FILTER langmatches(lang(?comment),"fr")
def getNodes(URI): # Empties context, returns context global context context = [] c = conn2.cursor() c.execute('SELECT * FROM nci WHERE URI=?', (URI, )) result = c.fetchall() c.close() if len(result) > 0: context = eval(result[0][1]) c.close() return context else: sparql = SPARQLWrapper(endpoint) sparql.setReturnFormat(JSON) print URI.rsplit('/')[-1], "has", # URI is_a X querystring = """ PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?s WHERE { <""" + str( URI) + """> rdfs:subClassOf ?s . FILTER ( isURI(?s )) . }""" sparql.setQuery(querystring) results = sparql.query().convert() for x in results["results"]["bindings"]: context.append([URI, "is a", x["s"]["value"]]) # X is a URI querystring = """ PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?o WHERE { ?o rdfs:subClassOf <""" + str( URI) + """> . FILTER (isURI(?o )) . }""" sparql.setQuery(querystring) results = sparql.query().convert() for x in results["results"]["bindings"]: context.append([x["o"]["value"], 'is a', URI]) # URI part_of X querystring = """ PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?s ?p WHERE { <""" + str(URI) + """> rdfs:subClassOf ?b1 . FILTER ( isBLANK(?b1)) . ?b1 owl:someValuesFrom ?s . ?b1 owl:onProperty ?p . }""" sparql.setQuery(querystring) results = sparql.query().convert() for x in results["results"]["bindings"]: if "part_of" in x["p"]["value"].lower(): context.append([URI, x["p"]["value"], x["s"]["value"]]) # X part_of URI querystring = """ PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#> PREFIX owl:<http://www.w3.org/2002/07/owl#> SELECT DISTINCT ?o ?p WHERE { ?blank owl:someValuesFrom <""" + str( URI) + """> . FILTER ( isBLANK(?blank)) . ?blank owl:onProperty ?p . ?o rdfs:subClassOf ?blank . FILTER ( isURI(?o )) . }""" sparql.setQuery(querystring) results = sparql.query().convert() for x in results["results"]["bindings"]: if "part_of" in x["p"]["value"].lower(): context.append([x["o"]["value"], x["p"]["value"], URI]) print len(context), "neighbours (to db)" c = conn2.cursor() t = (URI, str(context)) c.execute('insert into nci values (?,?)', t) conn2.commit() c.close() return context
def get_paths(Book1, Book2, id1, id2, out): sparql = SPARQLWrapper("http://eculture2.cs.vu.nl:6543/sparql/") paths = [] Book1 = re.sub("'", "%27", Book1) Book2 = re.sub("'", "%27", Book2) query_paths_L1 = "SELECT DISTINCT ?prop WHERE {<" + Book1 + "> ?prop <" + Book2 + "> .}" query_paths_L2_1 = "SELECT DISTINCT ?prop1 ?t2 ?prop2 ?v1 WHERE {<" + Book1 + "> ?prop1 ?v1 . <" + Book2 + "> ?prop2 ?v1. ?v1 rdf:type ?t2 .}" query_paths_L2_2 = "SELECT DISTINCT ?prop1 ?t2 ?prop2 ?v1 WHERE {<" + Book1 + "> ?prop1 ?v1 . ?v1 ?prop2 <" + Book2 + ">. ?v1 rdf:type ?t2 .}" query_paths_L3_1 = "SELECT distinct ?prop1 ?t2 ?prop2 ?t3 ?prop3WHERE {<" + Book1 + "> ?prop1 ?v1 . ?v1 ?prop2 ?v2 . ?v2 ?prop3 <" + Book2 + "> .?v1 rdf:type ?t2 . ?v2 rdf:type ?t3 .}" #print 'L1' #print query_paths_L1 sparql.setQuery(query_paths_L1) sparql.setReturnFormat(JSON) L1 = sparql.query().convert() for row in L1["results"]["bindings"]: res1 = '"' + id1 + '","' + id2 + '",' res = '"' + row["prop"]["value"] + '"\n' #if id1 not in book_paths.keys(): # book_paths[id1]={} #if id2 not in book_paths[id1].keys(): # book_paths[id1][id2]=[] #book_paths[id1][id2].append(res) paths.append(res1 + res) #out.write(res) try: sparql.setQuery(query_paths_L2_1) sparql.setReturnFormat(JSON) L2 = sparql.query().convert() for row in L2["results"]["bindings"]: res1 = '"' + id1 + '","' + id2 + '",' res = '"' + row["prop1"]["value"] + ',' res += row["t2"]["value"] + ',' res += row["prop2"]["value"] + '"\n' #out.write(res) #if id1 not in book_paths.keys(): # book_paths[id1]={} #if id2 not in book_paths[id1].keys(): # book_paths[id1][id2]=[] #book_paths[id1][id2].append(res) paths.append(res1 + res) except: print 'L2_1 wrong' sparql.setQuery(query_paths_L2_2) sparql.setReturnFormat(JSON) L2 = sparql.query().convert() for row in L2["results"]["bindings"]: res1 = '"' + id1 + '","' + id2 + '",' res = '"' + row["prop1"]["value"] + ',' res += row["t2"]["value"] + ',' res += row["prop2"]["value"] + '"\n' #out.write(res) #if id1 not in book_paths.keys(): # book_paths[id1]={} #if id2 not in book_paths[id1].keys(): # book_paths[id1][id2]=[] # book_paths[id1][id2].append(res) paths.append(res1 + res) return paths
def getRecetteList(): parameters = request.args # filter of the SPARQL query filter_clause = "" # filter on multiple ingredients filter_ingredients = "" # filter on multiple ingredients filter_keywords = "" # filter on note # Add the filter only if the note is provided note = parameters.get('note') if note is not None: if filter_clause == "": filter_clause = "FILTER( xsd:float(?ratingValue)>" + note + " " else: filter_clause += "&& xsd:float(?ratingValue)>" + note + " " # filter on tempDePrep # Add the filter only if the tempDePrep is provided tempDePrep = parameters.get('tempDePrep') if tempDePrep is not None: if filter_clause == "": filter_clause = 'FILTER( "' + tempDePrep + '"^^xsd:duration > xsd:duration(?totalTime) ' else: filter_clause += '&& "' + tempDePrep + '"^^xsd:duration > xsd:duration(?totalTime) ' # filter on typeCuisine # Add the filter only if the typeCuisine is provided typeCuisine = parameters.get('typeCuisine') if typeCuisine is not None: if filter_clause == "": filter_clause = "FILTER( CONTAINS(str(?cuisine),'" + typeCuisine + "' ) " else: filter_clause += "&& CONTAINS(str(?cuisine), '" + typeCuisine + "' ) " # Close the parenthesis at the end of the clause if filter_clause != "": filter_clause += ")." # filter on ingredient # Add the filter only if the ingredient is provided ingredientsList = parameters.get('ingredients') if ingredientsList is not None: ingredients = ingredientsList.split(',') for ingredient in ingredients: if filter_ingredients == "": filter_ingredients = "FILTER( CONTAINS(str(?ingredients), '" + ingredient + "' ) " else: filter_ingredients += "&& CONTAINS(str(?ingredients), '" + ingredient + "' ) " # Close the parenthesis at the end of the clause if filter_ingredients != "": filter_ingredients += ")." # filter on keyword # Add the filter only if the keyword is provided keywordsList = parameters.get('keywords') if (keywordsList is not None) and (keywordsList != ''): keywords = keywordsList.split(' ') for keyword in keywords: if filter_keywords == "": filter_keywords = "FILTER( CONTAINS(LCASE(str(?keywords)), '" + keyword + "' ) " else: filter_keywords += "|| CONTAINS(LCASE(str(?keywords)), '" + keyword + "' ) " # Close the parenthesis at the end of the clause if filter_keywords != "": filter_keywords += ")." query = """SELECT DISTINCT ?name ?desc ?img ?totalTime ?ratingValue ?source WHERE { { SELECT ?desc ?name ?img ?totalTime ?ratingValue Min(?source) AS ?source (group_concat(DISTINCT ?ingredients;separator = ";") as ?ingredients) (group_concat(DISTINCT ?keywords;separator = ";") as ?keywords) WHERE { SELECT DISTINCT ?desc ?name ?img ?ingredients ?totalTime ?ratingValue ?source ?keywords WHERE { ?recipe a schema:Recipe; schema:description ?desc; schema:name ?name; schema:image ?img; schema:recipeCuisine ?cuisine; schema:ingredients ?ingredients; schema:keywords ?keywords; schema:ratingValue ?ratingValue; schema:totalTime ?totalTime; wdrs:describedby ?source. """ + filter_clause + """ } } GROUP BY ?desc ?name ?img ?totalTime ?ratingValue } """ + filter_ingredients + """ """ + filter_keywords + """ } """ # get the result of the query in json sparql = SPARQLWrapper("http://linkeddata.uriburner.com/sparql") sparql.setQuery(query) sparql.setReturnFormat(JSON) results = sparql.query().convert() # get summary for each recette results = mappingSmallSummary(results) resp = make_response(results) resp.headers.set('Access-Control-Allow-Origin', '*') return resp
def _select_datasets_in_triplestore_base(self, query, datastore_name): """ Create a new dataset in the triplestore :param query: query of the sparql request :param datastore_name: name of the dtastore """ if not datastore_name: LOGGER.debug(u'No datastore name is given! Skipping...') return sparql_wrapper = SPARQLWrapper( self._get_query_endpoint(datastore_name)) sparql_wrapper.setQuery(query) sparql_wrapper.setMethod(POST) sparql_wrapper.setTimeout(10) sparql_wrapper.setReturnFormat(JSON) return sparql_wrapper.query()
((int(rowdf['Value 1']) - (localTimestamp & 0xFFFF)) & 0xFFFF)) - localTimeDif rowTime = startDate + timedelta(seconds=tempTime) rowDate = str(rowTime).split(" ", 1)[0] rowTime = str(rowTime).split(" ", 1)[1] rowValue = rowdf['Value 2'] if rowValue != 0: the_file.write( '<http://example.org/#measurement' + rowDate + "T" + rowTime + 'FR235> <http://example.org/hasType> <http://example.org/Type/heartRateMeasurement> ;\n' ) the_file.write(' <http://example.org/hasValue> ' + str(int(rowValue)) + ' ;\n') the_file.write( ' <http://example.org/device> <http://example.org/#GarminFR235> ;\n' ) the_file.write(' <http://example.org/measuredOn> "' + rowDate + "T" + rowTime + '"^^xsd:dateTime .\n') the_file.write('}\n\n') the_file.close() # read in turtle file and insert into triple store with open(fileStem + ".ttl", 'r') as file: data = file.read() file.close() sparql = SPARQLWrapper("http://localhost:3030/TDB/update") sparql.setQuery(data) sparql.method = 'POST' sparql.setReturnFormat(JSON) results = sparql.query()
import sys import re import os import requests import wget from w3lib.html import replace_entities import json import csv import shutil from SPARQLWrapper import SPARQLWrapper, JSON import time from rdflib import Graph, Namespace, BNode, URIRef, Literal #query wikidata sparql = SPARQLWrapper( "https://query.wikidata.org/sparql", agent='LexBib-Bibliodata-enrichment-script (lexbib.org)') sparql.setQuery( """SELECT ?isocode ?lang ?langName (lang(?langName) as ?langNamelang) WHERE { ?lang wdt:P220 ?isocode . ?lang rdfs:label ?langName . filter regex(str(lang(?langName)) , "^en|^de|^es|^eu|^ca|^gl|^sl|^fr|^nl|^hr|^cs|^da|^et|^fi|^el|^hu|^ga|^it|^lv|^lb|^mt|^nb|^nn|^pl|^pt|^sk|^sv") } """) sparql.setReturnFormat(JSON) #wdquerycount = wdquerycount + 1 time.sleep(1.5)
import sys from KafNafParserPy import * import redis from SPARQLWrapper import SPARQLWrapper, JSON from collections import defaultdict, Counter import ast import subprocess from rdflib import Graph, URIRef import jsonrpclib from simplejson import loads server = jsonrpclib.Server("http://localhost:3456/") def tokensToOffsets(words, startToken, endToken): return words[startToken][1]['CharacterOffsetBegin'], words[endToken][1]['CharacterOffsetEnd'] sparql = SPARQLWrapper("http://dbpedia.org/sparql") nones=["none", "nil", "--nme--"] def getCorefChains(g): documentText=getNIFString(g) # if not documentText.startswith("BADMINTON - WORLD GRAND PRIX RESULTS. BALI 1996-12-06 Results") and not documentText.startswith("CRICKET - 1997 ASHES INTINERARY. LONDON 1996-08-30 Australia"): if False: result = loads(server.parse(documentText)) chains=[] sentences=result['sentences'] if 'coref' in result: coref=result['coref'] for chain in coref: offsetChain=set() for pair in chain: for phrase in pair:
class QuestionSolver: def __init__(self): self.sparql = SPARQLWrapper('https://query.wikidata.org/sparql') self.wiki_api_url = 'https://www.wikidata.org/w/api.php' self.nlp = spacy.load('en_core_web_md') self.matcher = self.init_matcher() self.stop_words = {'a', 'by', 'of', 'the', '\'s', '"'} # simple translation dictionary to convert some phrasings into query keywords self.trans_dict = { 'direct': 'director', 'write': 'author', 'compose': 'composer', 'invent': 'inventor', 'bear': 'birth', 'die': 'death', } def init_matcher(self): matcher = Matcher(self.nlp.vocab) matcher.add('WHEN_WHERE', None, [{ 'LOWER': { 'IN': ['when', 'where'] } }, { 'DEP': { 'IN': ['ROOT', 'aux', 'auxpass'] } }]) matcher.add('X_OF_Y', None, [{ 'DEP': 'attr', 'LOWER': { 'IN': ['who', 'what'] } }, { 'LOWER': { 'IN': ['is', 'are', 'was', 'were'] } }]) matcher.add('WHO_DID_X', None, [{ 'DEP': 'nsubj', 'LOWER': 'who' }, { 'DEP': 'ROOT' }]) return matcher def answer_question(self, question): try: parsed_question = self.parse_question(question.strip().strip(' ?')) for answer in self.query_answer(parsed_question[0], parsed_question[1]): answer = answer['answerLabel']['value'] try: date = datetime.strptime(answer, '%Y-%m-%dT%H:%M:%SZ') print(date.strftime('%m/%d/%Y')) except ValueError: print(answer) except NoAnswerError as err: print(err) def parse_question(self, question): result = self.nlp(question) results = self.matcher(result) try: match_id, start, end = results[0] except IndexError: raise NoAnswerError( 'Question is ill-formed, cannot answer this question') if result.vocab.strings[match_id] == 'WHEN_WHERE': entity = [ w.text for w in next(w for w in result if w.dep_ in ['nsubj', 'nsubjpass']).subtree ] prop_one = result[0].lemma_ prop_two = result[-1].lemma_ prop = [prop_one, prop_two] elif result.vocab.strings[match_id] == 'X_OF_Y': prop_ent = next(w for w in result if w.dep_ == 'pobj') prop = [w.text for w in prop_ent.head.head.lefts ] + [prop_ent.head.head.text] entity = [w.text for w in prop_ent.subtree] elif result.vocab.strings[match_id] == 'WHO_DID_X': prop = ['who', next(w for w in result if w.dep_ == 'ROOT').lemma_] entity = [w.text for w in result[end:]] prop = self.translate_query(prop) entity = ' '.join(w for w in entity if w not in self.stop_words) return prop, entity def translate_query(self, query): query = [w for w in query if w not in self.stop_words] new_query = ' '.join(query) # default is to simply join the words # in some cases, the words in questions must be "translated" if 'members' in query: return 'has part' if len(query) < 2: return new_query if query[1] in ['direct', 'write', 'compose', 'invent']: if query[0] == 'who': new_query = self.trans_dict[query[1]] if query[0] == 'when': new_query = 'inception' elif query[1] in ['bear', 'die']: if query[0] == 'when': new_query = 'date of ' + self.trans_dict[query[1]] elif query[0] == 'where': new_query = 'place of ' + self.trans_dict[query[1]] elif query[1] == ['publish', 'release']: if query[0] == 'who': new_query = 'publisher' elif query[0] == 'when': new_query = 'publication date' return new_query def query_wikidata_api(self, string, prop_search=False): params = { 'action': 'query', 'format': 'json', 'list': 'search', 'srsearch': unidecode(string), 'srnamespace': 120 if prop_search else 0, 'srlimit': 5, 'srprop': '', } results = get(self.wiki_api_url, params).json()['query']['search'] if results: return [ res['title'][9:] if prop_search else res['title'] for res in results ] return None def query_answer(self, prop, entity): wikidata_props = self.query_wikidata_api(prop, True) wikidata_entities = self.query_wikidata_api(entity) if wikidata_props is None or wikidata_entities is None: raise NoAnswerError for wikidata_entity in wikidata_entities: for wikidata_prop in wikidata_props: query_string = ('SELECT ?answerLabel WHERE {{ ' ' wd:{} wdt:{} ?answer . ' ' SERVICE wikibase:label {{ ' ' bd:serviceParam wikibase:language "en" .' ' }}' '}}'.format(wikidata_entity, wikidata_prop)) self.sparql.setQuery(query_string) self.sparql.setReturnFormat(JSON) results = self.sparql.query().convert()['results']['bindings'] if not results: continue return results raise NoAnswerError
def login(self, user, password): if '/sparql' not in self._server: self._server += '/sparql' p = self._server.find('/sparql') resource = self._server[:p] login_endpoint = SPARQLWrapper(resource + '/login') login_endpoint.setMethod(POST) login_endpoint.addCustomHttpHeader( 'Content-Type', 'application/x-www-form-urlencoded') login_endpoint.addCustomHttpHeader('Accept', 'text/plain') login_endpoint.addCustomHttpHeader('charset', 'utf-8"') login_endpoint.addParameter('email', user) login_endpoint.addParameter('password', password) self.user = user self.authentication_key = login_endpoint.query().response.read( ).decode("utf-8")
# --- imports --- from pprint import pprint import re from SPARQLWrapper import SPARQLWrapper, JSON sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.query() sparql.addDefaultGraph("http://dbpedia.org") # --- constants --- VERBOSE = False # ontology SPARQ_AUTHOR_NAME = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX dbp: <http://dbpedia.org/ontology/> SELECT ?person WHERE {{ ?person a dbp:Person . ?person foaf:name "{}"@en . }} LIMIT 100 """ SPARQ_MOVEMENTS = """ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX dbp: <http://dbpedia.org/ontology/> PREFIX dct: <http://purl.org/dc/terms/> SELECT ?genre ?genre_name WHERE {{ <{}> dbp:genre ?genre . ?genre dct:subject dbc:Literary_movements . ?genre rdfs:label ?genre_name .
def retrieve_from_wikidata_second_round(seen_player_id_dict): sparql = SPARQLWrapper("https://query.wikidata.org/sparql") query = """ SELECT ?player ?playerLabel WHERE { ?player wdt:P106/(wdt:P279|wdt:P31)* wd:Q3665646 . ?player wdt:P21 wd:Q6581097 . ?player wdt:P27 wd:Q30 . OPTIONAL { ?player wdt:P1532 wd:Q30 } . SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } . } """ sparql.setQuery(query) sparql.setReturnFormat(JSON) wikidata_qualifier_rs = sparql.query().convert() qualife_result_df = pd.json_normalize( wikidata_qualifier_rs["results"]["bindings"]) resultf_df_ls = [] wikidata_qualifier_ls = qualife_result_df["player.value"] wikidate_label_ls = qualife_result_df["playerLabel.value"] new_wikidata_id_ls = [] test = 0 for i, wikidata_qualifier in enumerate(wikidata_qualifier_ls): wikidata_qualifier = get_wikidata(wikidata_qualifier) wikidate_label = wikidate_label_ls[i] wikidate_label = wikidate_label.strip() new_wikidata_id_ls.append( dict(wikidata_id=wikidata_qualifier, player_name=wikidate_label)) # 查询对应信息 # sparql.setQuery(query_each_person) # sparql.setReturnFormat(JSON) # wikidata_rs = sparql.query().convert() # wiki_df = pd.json_normalize(wikidata_rs["results"]["bindings"]) # # resultf_df_ls.append(wiki_df) # if len(resultf_df_ls) == 10000: # break conn = None cursor = None try: sql = """ INSERT IGNORE INTO WIKIDATA_INDEX (wikidata_id, player_name) VALUE (%(wikidata_id)s, %(player_name)s) """ print(datetime.datetime.now()) conn = get_mysql_conn() cursor = conn.cursor(buffered=True) cursor.executemany(sql, new_wikidata_id_ls) conn.commit() print("committed new wikidata id to database") except Exception: traceback.print_exc() if conn: conn.rollback() finally: if cursor: cursor.close() if conn: conn.close()
def is_endpoint(links: list, first_crawl=True): for link in links: print(f"\nCurrent Website : {link}") # Site to be checked & Query & Timeout configuration sparql = SPARQLWrapper(f"{link}", returnFormat=JSON) sparql.setQuery("ASK WHERE { ?s ?p ?o. }") sparql.setTimeout(30) sparql.setOnlyConneg(True) link_domain = urlparse(link).netloc try: # Execute query and convert results to the returnFormat which is JSON. query_result = sparql.queryAndConvert() if query_result[ "boolean"] and not Database.in_the_endpoints_collection( link_domain): Database.insert_to_endpoints_collection(link, link_domain) print("Endpoint written on DB.") else: if Database.in_the_endpoints_collection(link_domain): print("Endpoint already exist in DB.") else: print("This site isn't a SPARQL endpoint.") except (EndPointNotFound, EndPointInternalError, QueryBadFormed) as e: if first_crawl: # first crawl if is_alive(link) and not Database.in_the_endpoints_collection(link_domain) \ and not Database.in_the_second_crawl_domains_collection(link_domain): Database.insert_to_second_crawl_domains_collection( link_domain) print( f"This site's domain is added for second crawl. site : {link_domain}" ) elif Database.in_the_endpoints_collection(link_domain) \ or Database.in_the_second_crawl_domains_collection(link_domain): print("This domain already exist in DB.") else: print("This site is not alive.") continue else: # second crawl continue except (HTTPError, URLError) as UrllibError: if first_crawl: # first crawl if "503" in str(UrllibError): print("This site is not alive.") elif "certificate verify failed" in str(UrllibError) \ and not Database.in_the_endpoints_collection(link_domain) \ and not Database.in_the_second_crawl_domains_collection(link_domain): Database.insert_to_second_crawl_domains_collection( link_domain) print( f"This site's domain is added for second crawl. site : {link_domain}" ) else: Sparql.general_control_for_missed_endpoint( link, link_domain) print("Urllib Error.") else: # second crawl Sparql.general_control_for_missed_endpoint_in_second_crawl( link, link_domain) continue except (SPARQLWrapperException, URITooLong, Unauthorized) as WrapperException: print("Error while wrapping endpoint: ", WrapperException) print("WrapperException") except TypeError: if first_crawl: # first crawl Sparql.general_control_for_missed_endpoint( link, link_domain) print("Type Error") else: # second crawl Sparql.general_control_for_missed_endpoint_in_second_crawl( link, link_domain) continue except Exception: if first_crawl: # first crawl Sparql.general_control_for_missed_endpoint( link, link_domain) print('Exception: ') else: # second crawl Sparql.general_control_for_missed_endpoint_in_second_crawl( link, link_domain) continue
# -*- coding: UTF-8 -*- ''' Created on 20181025 @author: Hansen ''' from SPARQLWrapper import SPARQLWrapper, JSON sparql = SPARQLWrapper("http://localhost:3030/movies/sparql") sparql.setQuery(""" PREFIX : <http://www.neohope.com/hansen/ontologies/2018/movies#> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> SELECT ?title WHERE { ?aPerson rdf:type :Person. ?aPerson :personName '巩俐'. ?aPerson :hasActedIn ?aMovie. ?aMovie :movieTitle ?title. ?aMovie :movieRating ?rating. FILTER (?rating>=7) } LIMIT 10 """) sparql.setReturnFormat(JSON) results = sparql.query().convert() for result in results["results"]["bindings"]: print(result["title"]["value"])