def clear(): # Configurations config = ConfigParser() config.read('config.ini') endpoint_uri = config['Mandatory']['endpointURI'] graph_uri = config['Mandatory']['graphURI'] clean_graph_query = "CLEAR GRAPH <" + graph_uri + ">" # Set up endpoint and access to triple store sparql = SPARQLWrapper(endpoint_uri) sparql.setReturnFormat(JSON) sparql.setMethod(POST) store = SPARQLUpdateStore(endpoint_uri, endpoint_uri) # Specify the (named) graph we're working with sparql.addDefaultGraph(graph_uri) # Create an in memory graph g = Graph(store, identifier=graph_uri) # Cleanup the existing triples sparql.setQuery(clean_graph_query) sparql.query().convert() # Cleanup the graph instance g.close()
def clear(): # Configurations config = ConfigParser() config.read('config.ini') endpoint_uri = config['Mandatory']['endpointURI'] graph_uri = config['Mandatory']['graphURI'] clean_graph_query = "CLEAR GRAPH <"+graph_uri+">" # Set up endpoint and access to triple store sparql = SPARQLWrapper(endpoint_uri) sparql.setReturnFormat(JSON) sparql.setMethod(POST) store = SPARQLUpdateStore(endpoint_uri, endpoint_uri) # Specify the (named) graph we're working with sparql.addDefaultGraph(graph_uri) # Create an in memory graph g = Graph(store, identifier=graph_uri) # Cleanup the existing triples sparql.setQuery(clean_graph_query) sparql.query().convert() # Cleanup the graph instance g.close()
def run_query(endpoint_URL, query, timeout): # Creazione del wrapper sull'endpoint selezionato dall'utente sparql = SPARQLWrapper(endpoint_URL) # Impostazione di defaultGraph (testato solo per DBPEDIA, trovare metodo per farlo funzionare in tutti) if endpoint_URL == "http://dbpedia.org/sparql": sparql.addDefaultGraph(endpoint_URL.replace('/sparql', '')) # Viene impostato timeout di 30 secondi sulla query if timeout is not None or timeout != 0: sparql.setTimeout(timeout+10) # Timeout inserito come parametro della richiesta in millisecondi sparql.addParameter("timeout", str(timeout*1000)) # Viene impostata la query sparql.setQuery(query) # Viene impostato il formato di conversione del risultato della query in JSON sparql.setReturnFormat(JSON) # Viene eseguita la query e restituito il dizionario contenente il risultato della stessa se riceve la risposta response_data = None #! DEBUG stampa della richiesta # print(vars(sparql._createRequest())) #a = sparql.query() # print(a) try: response_data = sparql.query().convert() # print(response_data) except socket.timeout: response_data = "ERROR: Timeout superato" except Exception: response_data = "ERROR: Errore generico" finally: return response_data if type(response_data) is not str and response_data['results']['bindings'] != [] else "Nessun Risultato"
def get_triples(endpoint_uri, graph_uri, qs, triple_format): sparql = SPARQLWrapper(endpoint_uri) sparql.addDefaultGraph(graph_uri) sparql.setQuery(qs) if triple_format == "N3": sparql.setReturnFormat(N3) results = sparql.query().convert() return results elif triple_format == "JSON": sparql.setReturnFormat(JSON) results = sparql.query().convert() return results elif triple_format == "XML": sparql.setReturnFormat(XML) results = sparql.query().convert() return results.toxml() elif triple_format == "RDFXML": sparql.setReturnFormat(RDFXML) results = sparql.query().convert() return results.serialize() elif triple_format == "CSV": sparql.setReturnFormat(CSV) results = sparql.query().convert() return results elif triple_format == "TSV": sparql.setReturnFormat(CSV) results = sparql.query().convert() return results else: return "bad format!" return
def _execute_query(query, endpoint, return_format=JSON, default_graph=None): dbpedia_endpoint = SPARQLWrapper(endpoint) dbpedia_endpoint.setReturnFormat(return_format) if default_graph is not None: dbpedia_endpoint.addDefaultGraph(default_graph) dbpedia_endpoint.setQuery(query) results = dbpedia_endpoint.query() return results
def _execute_query(query, endpoint, return_format=JSON, default_graph=None): dbpedia_endpoint = SPARQLWrapper(endpoint) dbpedia_endpoint.setReturnFormat(return_format) if default_graph is not None: dbpedia_endpoint.addDefaultGraph(default_graph) dbpedia_endpoint.setQuery(query) results = dbpedia_endpoint.query() return results
def getSparqlObject(self, graphName=None, query=None): sparql = SPARQLWrapper(SparqlStore.SPARQL_ENDPOINT) sparql.addDefaultGraph(self.getGraphURI(graphName)) sparql.setQuery(query) sparql.setMethod(POST) sparql.queryType = SELECT sparql.setReturnFormat(JSON) sparql.setTimeout(0.1) return sparql
def _wrapper(self): sparql_wrapper = SPARQLWrapper(self.url) sparql_wrapper.user=self.user sparql_wrapper.passwd=self.passwd if self.default_graph: sparql_wrapper.addDefaultGraph(self.default_graph) if self.http_auth: sparql_wrapper.setHTTPAuth(self.http_auth) return sparql_wrapper
def _wrapper(self): sparql_wrapper = SPARQLWrapper(self.url) sparql_wrapper.user = self.user sparql_wrapper.passwd = self.passwd if self.default_graph: sparql_wrapper.addDefaultGraph(self.default_graph) if self.http_auth: sparql_wrapper.setHTTPAuth(self.http_auth) return sparql_wrapper
class Search(object): """ Python class which searches for the entities found in previous step, in the dbpedia database. If the entities were not found, they're declared in a list as new entities. Attributes: new: a list of new entities. existing: a list of existing entities. sparql: This is a wrapper around a SPARQL service. It helps in creating the query URI and, possibly, convert the result into a more manageable format. """ def __init__(self): self.sparql = SPARQLWrapper("http://dbpedia.org/sparql") self.sparql.addDefaultGraph("http://dbpedia.org") self.new = [] self.existing = [] def query(self, entities): """ This function search in the DBpedia and checks if entities exist in knowledge base. Args: entities: a list of tuples; words, and their labels. Returns: Returns a list of new enetities which are words not in the DBpedia, and a list of contextual words (uni-gram words) exists in DBpedia. Entities in both lists are considered as nodes in Heterogeneous Textual Graph. """ # we search in the dbpedia to see if the words exist for i in entities: if re.match("^[a-zA-Z_]*$", i[0]): self.sparql.setQuery(""" SELECT DISTINCT ?item ?label WHERE{ ?item rdfs:label ?label . FILTER (lang(?label) = 'en'). ?label bif:contains '%s' . ?item dct:subject ?sub } """ % i[0]) try: self.sparql.setReturnFormat(CSV) results = self.sparql.query() triples = results.convert() except: triples = '\n' print("query failed") # if the word exists, we add it to the to existing list if len(triples) > 15: # print(triples) self.existing.append(i) # if the word doesn't exists, we add it to the to new list else: self.new.append(i) return self.new, self.existing
def query_dbpedia_relation(entities): # global count # count += 1 # print(count / total) e1, e2 = entities[0], entities[1] sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setQuery(""" SELECT DISTINCT ?p WHERE { <%s> ?p ?o } """ % e1) sparql.setReturnFormat(JSON) sparql.addDefaultGraph("http://dbpedia.org") results = sparql.query().convert() if len(results["results"]["bindings"]): e1_res = list( set([ x['p']['value'] for x in results["results"]["bindings"] if ('property' in x['p']['value'] or 'ontology' in x['p']['value']) ])) sparql.setQuery(""" SELECT DISTINCT ?p WHERE { ?s ?p <%s> } """ % e2) results = sparql.query().convert() if len(results["results"]["bindings"]): e2_res = list( set([ x['p']['value'] for x in results["results"]["bindings"] if ('property' in x['p']['value'] or 'ontology' in x['p']['value']) ])) if e1_res and e2_res: for predicate in intersection(e1_res, e2_res): if 'wiki' not in predicate: with open('dataset.txt', 'a+', encoding='utf8') as dest: dest.write("{}\t{}\t{}\n".format(e1, predicate, e2))
def execute_sparql_query(query): #first try the cache result = cache.get(query) if result is not None: return result sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.addDefaultGraph("http://dbpedia.org") sparql.setReturnFormat(JSON) sparql.setQuery(query) result = sparql.query().convert()["results"]["bindings"] cache.put(query, result) return result
def _graph_sparql(self, named_graph, query): """Execute SPARQL query on the Graph Store.""" store_api = "{0}/query".format(self.request_address) try: sparql = SPARQLWrapper(store_api) # add a default graph, though that can also be in the query string sparql.addDefaultGraph(named_graph) sparql.setQuery(query) data = sparql.query().convert() except Exception as error: app_logger.error('Something is wrong: {0}'.format(error)) raise else: app_logger.info( 'Execture SPARQL query on named graph: {0}.'.format( named_graph)) return data.toxml()
def execute_sparql_query(query, cached=True): #first try the cache if cached: result = cache.get(query) if result is not None: return result #if no result in cache sparql = SPARQLWrapper("http://dbpedia.org/sparql") # sparql = SPARQLWrapper("http://35.196.96.177:8890/sparql") sparql.addDefaultGraph("http://dbpedia.org") sparql.setReturnFormat(JSON) sparql.setQuery(query) result = sparql.query().convert()["results"]["bindings"] cache.put(query, result) return result if result else None
def _sparql_super_class(type: str): super = [] sparql = SPARQLWrapper(sparql_help["dbpedia"]["endpoint"]) sparql.addDefaultGraph(sparql_help["dbpedia"]["default_graph"]) ressource_uri = "<" + type + ">" query = ( "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> select ?super where {" + ressource_uri + "rdfs:subClassOf ?super}") sparql.setQuery(query) try: sparql.setReturnFormat(JSON) results = sparql.query() triples = results.convert() for t in triples["results"]["bindings"]: super.append(t["super"]["value"]) except Exception: print("query failed") return None return super
def run_query(): # pprint.pprint(request) # sys.exit() _graphURI = request.form['inputGraphURI'] _queryText = request.form['inputQuery'] pprint.pprint(_graphURI) pprint.pprint(_queryText) sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.addDefaultGraph(_graphURI) sparql.setQuery(_queryText) sparql.setReturnFormat(JSON) results = sparql.query() results = results.convert() return json.dumps(results)
def returnTypeDBpedia(): _types = [] sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.addDefaultGraph("http://dbpedia.org") query = "select ?type {" query += " ?type a owl:Class ." query += "}" sparql.setQuery(query) sparql.setReturnFormat(JSON) results = sparql.query().convert() for result in results["results"]["bindings"]: type_ = str(result["type"]["value"]) data = type_.split('/') _types.append(data[4]) return _types
def execute_query(query_path, dataset): sparql = SPARQLWrapper("http://127.0.0.1:8890/sparql") f = open(query_path, "r") query = f.read() sparql.addDefaultGraph("http://" + dataset.lower() + ".lingbm.morphgraphql.oeg-upm.net/") sparql.setQuery(query) sparql.setReturnFormat(JSON) #print(query) s = time.time() d = sparql.query() delta = time.time() - s #print(d.convert()) return delta
def get_types_from_endpoint(query_info: dict, ressource_uri: str): types = [] sparql = SPARQLWrapper(query_info["endpoint"]) sparql.addDefaultGraph(query_info["default_graph"]) additional_prefix = (query_info["additional_prefix"] if "additional_prefix" in query_info else "") ressource_uri = (query_info["resource_prefix"] + ressource_uri if "resource_prefix" in query_info else "<" + ressource_uri + ">") query = ( additional_prefix + "\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> select ?type where {" + ressource_uri + " rdf:type ?type } LIMIT 50") sparql.setQuery(query) try: sparql.setReturnFormat(JSON) results = sparql.query() triples = results.convert() for t in triples["results"]["bindings"]: types.append(t["type"]["value"]) except Exception: print("query failed") print(query) return types
def main(): # Track executing time # start_time = time.time() headers = {'content-type': 'application/json'} # HTTP header content type # Configurations config = ConfigParser() config.read('config.ini') endpoint_uri = config['Mandatory']['endpointURI'] graph_uri = config['Mandatory']['graphURI'] pool_uri = (config['Mandatory']['poolURI']).split(',') type_uri = (config['Mandatory']['typeURI']).split(',') # Set up endpoint and access to triple store sparql = SPARQLWrapper(endpoint_uri) sparql.setReturnFormat(JSON) sparql.setMethod(POST) store = SPARQLUpdateStore(endpoint_uri, endpoint_uri) # Specify the (named) graph we're working with sparql.addDefaultGraph(graph_uri) # Create an in memory graph g = Graph(store, identifier=graph_uri) # Build the RDF from the JSON source data # This function is to be called for each URL in the pool to harvest, in case that the source is in json, with the Estonian mapping def rdf(urlrdf, f): input = Graph() input.open("store2", create=True) input.parse(urlrdf, format=f) for s, p, o in input: g.add((s, p, o)) input.close() # Set counter c = 0 # Loop over all URI in the pool while c < len(pool_uri): print(pool_uri[c],type_uri[c]) if type_uri[c] == 'jsonEstonia': try: # Fetch the JSON data response = requests.get(pool_uri[c], headers=headers).json() # Process the response configJSON = ConfigParser() configJSON.read('mapping_estonia.ini') json_to_rdf(pool_uri[c], response, g, configJSON) except ValueError as e: print(e) if type_uri[c] == 'xml' or type_uri[c] == 'turtle' or type_uri[c] == 'nt': rdf(pool_uri[c], type_uri[c]) # Counter update c += 1 # Iterate over triples in store and print them out. print('\r\nNumber of triples added: %d' % len(g)) # Cleanup the graph instance g.close()
config.read('config_3.ini') URI = sys.argv[1] classType = sys.argv[2] endpoint_uri = config['Mandatory']['endpointURI'] graph_uri = config['Mandatory']['graphURI'] # Set up endpoint and access to triple store sparql = SPARQLWrapper(endpoint_uri) sparql.setReturnFormat(JSON) sparql.setMethod(POST) store = SPARQLUpdateStore(endpoint_uri, endpoint_uri) # Specify the (named) graph we're working with sparql.addDefaultGraph(graph_uri) # Create an in memory graph g = Graph(store, identifier=graph_uri) query = "select ?p ?o where {<"+ URI +"> ?p ?o}" properties = g.query (query) # Configurations mappings mapping = ConfigParser() mapping.read('mapping_fields.ini') propURI = "" props = "" for row in properties: propURI = str(row[0])
class BrickSparql(object): def __init__( self, sparql_url, brick_version, graph, base_ns, username='******', password='******', update_url=None, httpauth_type=DIGEST, ): self.BRICK_VERSION = brick_version self.sparql_url = sparql_url self.update_url = update_url self.BASE = Namespace(base_ns) self.base_graph = graph self.BRICK = Namespace( 'https://brickschema.org/schema/{0}/Brick#'\ .format(self.BRICK_VERSION)) self.BRICK_USE = Namespace( 'https://brickschema.org/schema/{0}/BrickUse#'\ .format(self.BRICK_VERSION)) self.BF = Namespace( 'https://brickschema.org/schema/{0}/BrickFrame#'\ .format(self.BRICK_VERSION)) self.BRICK_TAG = Namespace( 'https://brickschema.org/schema/{0}/BrickTag#'\ .format(self.BRICK_VERSION)) PROV = Namespace('http://www.w3.org/ns/prov#') self.namespaces = { '': self.BASE, 'base': self.BASE, 'brick':self.BRICK, 'bf': self.BF, 'brick_tag': self.BRICK_TAG, 'brick_use': self.BRICK_USE, 'rdfs': RDFS, 'rdf': RDF, 'owl': OWL, 'foaf': FOAF, 'prov': PROV, } self.init_q_prefix() self.init_sparql(self.sparql_url, username, password, httpauth_type) def init_sparql(self, sparql_url, username, password, httpauth_type): if not self.update_url: update_url = sparql_url + '-auth' else: update_url = self.update_url self.sparql = SPARQLWrapper(endpoint=sparql_url, updateEndpoint=update_url) if self.base_graph: self.sparql.addDefaultGraph(self.base_graph) self.sparql.queryType = SELECT self.sparql.setCredentials(username, password) self.sparql.setHTTPAuth(httpauth_type) def init_q_prefix(self): self.q_prefix = '' for prefix, ns in self.namespaces.items(): if 'uri' in dir(ns): ns_n3 = ns.uri.n3() else: ns_n3 = ns[''].n3() self.q_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3) self.q_prefix += '\n' def _get_sparql(self): # If need to optimize accessing sparql object. return self.sparql def _format_select_res(self, raw_res): var_names = raw_res['head']['vars'] tuples = [[row[var_name]['value'] for var_name in var_names] for row in raw_res['results']['bindings']] #TODO: Below line is a hack. var_names = ['?'+var_name for var_name in var_names] return { 'var_names': var_names, 'tuples': tuples } def parse_result(self, res): raw_res = res common_res = res return common_res, raw_res def add_graphs_to_select_qstr(self, qstr, graphs=[]): if not graphs: return qstr [prefix, body] = re.split(re.compile('where', re.IGNORECASE), qstr) graph_body = '\n' for graph in graphs: graph_body += 'FROM <{0}>\n'.format(graph) return prefix + graph_body + 'where ' + body def add_graphs_to_insert_qstr(self, qstr, graphs=[]): if graphs: graph = graphs[0] else: graph = self.base_graph qstr = 'WITH <{0}>\n'.format(graph) + qstr return qstr def add_graphs_to_insert_qstr_dep(self, qstr, graphs=[]): assert len(graphs) <= 1, 'Cannot insert into multiple graphs. Choose a graph or no graph' #if not graphs: # return qstr if graphs: graph = graphs[0] else: graph = self.base_graph [graph_prefix, body] = re.split(re.compile('insert{', re.IGNORECASE), qstr) graph_prefix += 'INSERT{\n' graph_prefix += 'GRAPH <{0}> {{'.format(graph) splitted_body = re.split('}', body) insert_body = splitted_body[0] augmented_qstr = graph_prefix + insert_body + ' }\n}' + '}'.join(splitted_body[1:]) return augmented_qstr def update(self, qstr, graphs=[]): sparql = self._get_sparql() sparql.setMethod(POST) sparql.setReturnFormat(JSON) query_type = qstr[:6] if query_type.upper() == 'INSERT': qstr = self.add_graphs_to_insert_qstr(qstr, graphs) elif graphs: raise Exception('not implemented yet') qstr = self.q_prefix + qstr sparql.setQuery(qstr) raw_res = sparql.query().convert() if sparql.queryType == SELECT: res = self._format_select_res(raw_res) elif sparql.queryType in [INSERT, LOAD, DELETE]: res = raw_res # TODO: Error handling here return res def raw_query(self, qstr): return self.query(qstr) # TODO: How to handle different graphs? def query(self, qstr, graphs=[], is_update=False): sparql = self._get_sparql() sparql.setMethod(POST) sparql.setReturnFormat(JSON) qstr = self.q_prefix + qstr if not is_update: # TODO: Implement this for update as well. qstr = self.add_graphs_to_select_qstr(qstr, graphs) sparql.setQuery(qstr) raw_res = sparql.query().convert() if sparql.queryType == SELECT: res = self._format_select_res(raw_res) elif sparql.queryType in [INSERT, LOAD, DELETE]: res = raw_res # TODO: Error handling here return res def _create_insert_query(self, triples, graph=None): if not graph: graph = self.base_graph q = 'INSERT DATA {\n' if graph: q += ' GRAPH <{0}> {{'.format(graph) for triple in triples: triple_str = ' '.join([term.n3() for term in triple]) + ' .\n' q += triple_str q += '}\n' if graph: q += '}' return q def _create_delete_query(self, triples, graph=None): if not graph: graph = self.base_graph q = """ DELETE DATA {{ GRAPH <{0}> {{ """.format(graph) for triple in triples: triple_str = ' '.join([term.n3() for term in triple]) + ' .\n' q += triple_str q += """} } """ return q def _is_bool(self, s): s = s.lower() if s == 'true' or s == 'false': return True else: return False def _str2bool(self, s): s = s.lower() if s == 'true': return True elif s == 'false': return False else: raise Exception('{0} is not convertible to boolean'.format(s)) def _is_float(self, s): try: float(s) return True except ValueError: return False def _parse_term(self, term): if isinstance(term, rdflib.term.Identifier): return term elif isinstance(term, str): if 'http' == term[0:4]: node = URIRef(term) elif ':' in term: #TODO: This condition is dangerous. [ns, id_] = term.split(':') ns = self.namespaces[ns] node = ns[id_] else: if term.isdigit(): term = int(term) elif self._is_float(term): term = float(term) if self._is_bool(term): term = _str2bool(term) node = Literal(term) else: node = Literal(term) return node def add_ns_prefix(self, ns, prefix): ns = Namespace(ns) self.namespaces[prefix] = ns self.init_q_prefix() def make_triple(self, pseudo_s, pseudo_p, pseudo_o, graph=None): if not graph: graph = self.base_graph s = self._parse_term(pseudo_s) p = self._parse_term(pseudo_p) o = self._parse_term(pseudo_o) return (s, p, o) def add_triple(self, pseudo_s, pseudo_p, pseudo_o, graph=None): self.add_triples([(pseudo_s, pseudo_p, pseudo_o)], graph) def add_triples(self, pseudo_triples, graph=None): if not graph: graph = self.base_graph triples = [self.make_triple(*pseudo_triple) for pseudo_triple in pseudo_triples] q = self._create_insert_query(triples, graph) res = self.query(q, is_update=True) def delete_triple(self, pseudo_s, pseudo_p, pseudo_o, graph=None): self.delete_triples([(pseudo_s, pseudo_p, pseudo_o)], graph) def delete_triples(self, pseudo_triples, graph=None): if not graph: graph = self.base_graph triples = [self.make_triple(*pseudo_triple) for pseudo_triple in pseudo_triples] q = self._create_delete_query(triples, graph) res = self.query(q, is_update=True) def load_schema(self): if semver_compare(self.BRICK_VERSION, '1.1.0') < 0: schema_ns = [self.BRICK, self.BRICK_USE, self.BF, self.BRICK_TAG] else: schema_ns = [self.BRICK] schema_urls = [str(ns)[:-1] + '.ttl' for ns in schema_ns] load_query_template = 'LOAD <{schema_url}>' if self.base_graph: load_query_template += ' into <{0}>'.format(self.base_graph) for schema_url in schema_urls: qstr = load_query_template.format(schema_url=schema_url.replace('https', 'http')) res = self.query(qstr) def load_rdffile(self, f, graph=None): if not graph: graph = self.base_graph if (isinstance(f, str) and os.path.isfile(f)) or isinstance(f, StringIO): # TODO: Optimize this with using Virtuoso API directly new_g = rdflib.Graph() new_g.parse(f, format='turtle') res = [row for row in new_g.query('select ?s ?p ?o where {?s ?p ?o.}')] for rows in striding_windows(res, 500): self.add_triples(rows, graph=graph) elif isinstance(f, str) and validators.url(f): raise Exception('Load ttl not implemented for {0}'.format('url')) else: raise Exception('Load ttl not implemented for {0}'.format(type(f))) def add_brick_instance(self, entity_id, tagset, ns_prefix=None, graph=None): if not isinstance(entity_id, URIRef): if ns_prefix: ns = self.namespaces[ns_prefix] entity = ns[entity_id] else: entity = URIRef(entity_id) else: entity = entity_id tagset = self.BRICK[tagset] triples = [(entity, RDF.type, tagset)] self.add_triples(triples, graph) return entity
class Spar(object): ''' class to query DBpedia for URI identity; URIs are identical when connected by page redirects ''' def __init__(self): ''' constructor ''' self.sparql = SPARQLWrapper("http://dbpedia.org/sparql") self.sparql.addDefaultGraph("http://dbpedia.org") def isEqualTest(self, uri1, uri2): ''' check for identity of URIs (test) ''' spar = ("ASK {<" + uri1 + "> " "(<http://dbpedia.org/ontology/wikiPageRedirects>|" "^<http://dbpedia.org/ontology/wikiPageRedirects>)* " "<" + uri2 + ">}") #print spar return self.evalQuery(spar) def isEqual(self, uri1, uri2): ''' check for identity of URIs ''' spar = ("ASK {<" + uri1 + "> " "(<http://dbpedia.org/ontology/wikiPageRedirects>|" "^<http://dbpedia.org/ontology/wikiPageRedirects>)* " "<" + uri2 + ">}") #print self.evalRes(self.evalQuery(spar)) return True and self.evalRes(self.evalQuery(spar)) def evalQuery(self, query): ''' evaluate query remotely ''' time.sleep(1) # run one query per second try: self.sparql.setReturnFormat(JSON) self.sparql.setQuery(query) print "running", query return self.sparql.query().convert() except: return False def evalRes(self, results): ''' check for value of ask query ''' if results == False: return results else: return results['boolean'] def printRes(self, results): ''' print raw JSON results (tests) ''' print results def test(self): ''' test if it works ''' uri1 = "http://dbpedia.org/resource/NaN" uri2 = "NaN" for i in range(20): print "query", i, self.isEqual(uri1, uri2) # # execute # if __name__ == '__main__': # sp = Spar() # sp.test()
from SPARQLWrapper import SPARQLWrapper, JSON ,XML , POST, DIGEST import json sparql = SPARQLWrapper("http://localhost:8890/sparql") sparql.addDefaultGraph("http://www.Gokdepartments.org") def selDeptList(): sparql.setQuery(""" SELECT * {?Org <http://www.w3.org/ns/org#Name> ?name. ?Org <http://www.w3.org/ns/org#DepartmentID> ?ID.} """) sparql.setReturnFormat(JSON) results = sparql.query().convert() dept=[] for result in results["results"]["bindings"]: dept.append({ 'name': result["name"]["value"], 'id': result["ID"]["value"] }) return (json.dumps(dept)) def selDeptDetail(deptId): sparql.setQuery(""" PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> PREFIX org: <http://www.w3.org/ns/org#> SELECT ?name ?ID ?altname ?location ?url ?phone ?mail ?pincode ?addresse where { ?d rdf:type <http://www.w3.org/ns/org#FormalOrganization>. ?d org:Name ?name.
class WriterPlugin(RDFWriter): def __init__(self, reader, *args, **kwargs): super(WriterPlugin, self).__init__(reader, *args, **kwargs) if isinstance(self.reader, ReaderPlugin): self._endpoint = self.reader.endpoint else: self._endpoint = kwargs.get("endpoint") self._combine_queries = kwargs.get("combine_queries") self._results_format = JSON self._sparql_wrapper = SPARQLWrapper(self._endpoint, returnFormat=self._results_format) user = kwargs.get('user', None) password = kwargs.get('password', None) if user is not None and password is not None: self._sparql_wrapper.setCredentials(user, password) self._sparql_wrapper.setMethod("POST") default_graph = kwargs.get('default_graph', None) if default_graph: self._sparql_wrapper.addDefaultGraph(default_graph) @property def endpoint(self): return self._endpoint def _save(self, *resources): for context, items in _group_by_context(resources).items(): # Deletes all triples with matching subjects. remove_query = _prepare_delete_many_query(items, context) insert_query = _prepare_add_many_query(items, context) self._execute(remove_query, insert_query) def _update(self, *resources): for context, items in _group_by_context(resources).items(): # Explicitly enumerates triples for deletion. remove_query = _prepare_selective_delete_query(items, context) insert_query = _prepare_add_many_query(items, context) self._execute(remove_query, insert_query) def _remove(self, *resources, **kwargs): for context, items in _group_by_context(resources).items(): # Deletes all triples with matching subjects. inverse = kwargs.get("inverse") query = _prepare_delete_many_query(items, context, inverse) self._execute(query) def _size(self): """ Return total count of triples, not implemented. """ raise NotImplementedError def _add_triple(self, s=None, p=None, o=None, context=None): self._add(s, p, o, context) def _set_triple(self, s=None, p=None, o=None, context=None): self._remove_from_endpoint(s, p, context=context) self._add(s, p, o, context) def _remove_triple(self, s=None, p=None, o=None, context=None): self._remove_from_endpoint(s, p, o, context) def _execute(self, *queries): """ Execute several queries. """ translated = [str(query) for query in queries] if self._combine_queries: translated = ["\n".join(translated)] try: for query_str in translated: debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query() return True except EndPointNotFound as _: raise_(SparqlWriterException, "Endpoint not found", sys.exc_info()[2]) except QueryBadFormed as _: raise_(SparqlWriterException, "Bad query: %s" % query_str, sys.exc_info()[2]) except Exception as e: msg = "Exception: %s (query: %s)" % (e, query_str) raise_(SparqlWriterException, msg, sys.exc_info()[2]) def _add_many(self, triples, context=None): debug("ADD several triples") query = insert() if context: query.into(context) for s, p, o in triples: query.template((s, p, o)) query_str = str(query) try: debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query().convert() return True except EndPointNotFound as _: raise_(SparqlWriterException, "Endpoint not found", sys.exc_info()[2]) except QueryBadFormed as _: raise_(SparqlWriterException, "Bad query: %s" % query_str, sys.exc_info()[2]) except Exception as e: raise_(SparqlWriterException, "Exception: %s" % e, sys.exc_info()[2]) def _add(self, s, p, o, context=None): return self._add_many([(s, p, o)], context) def _remove_from_endpoint(self, s=None, p=None, o=None, context=None): debug('REM : %s, %s, %s, %s' % (s, p, o, context)) query = delete() try: if s is None and p is None and o is None and context: query = clear().graph(context) else: if context: query = delete().from_(context) query.template(("?s", "?p", "?o")) if context: where_group = NamedGroup(context) else: where_group = Group() where_group.append(("?s", "?p", "?o")) filter = Filter("({0})".format(self.__build_filter(s, p, o))) where_group.append(filter) query.where(where_group) query_str = str(query) debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query().convert() return True except EndPointNotFound as _: error("SPARQL endpoint not found") except QueryBadFormed as _: error("Bad-formed SPARQL query") except SPARQLWrapperException as _: error("SPARQLWrapper exception") return None def __build_filter(self, s, p, o): vars = [(s, '?s'), (p, '?p'), (o, '?o')] parts = [] for var in vars: if var[0] is not None: parts.append("%s = %s" % (var[1], self._term(var[0]))) return " and ".join(parts) def index_triples(self, **kwargs): """ performs index of the triples if such functionality is present, returns True if operation successful """ # SPARQL/Update does not support indexing operation return False def load_triples(self, source=None, context=None): """ Load resources on the web into the triple-store. :param str source: path to the sources of triples to load :param context: the given context :return: True if successful :rtype: bool """ if source: query = load() query.load(remote_uri=source) if context: query.into(context) query_str = str(query) debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query().convert() return True return False def _clear(self, context=None): """ Clear the triple-store. """ self._remove_from_endpoint(None, None, None, context=context) def _term(self, term): if isinstance(term, (URIRef, BNode)): return u'{0:s}'.format elif isinstance(term, (str, str)): if term.startswith('?'): return u'{0:s}'.format(term) elif is_uri(term): return u'<{0:s}>'.format(term) else: return u'"{0:s}"'.format(term) elif type(term) is Literal: return term.n3() elif isinstance(term, (list, tuple)): return '"{0:s}"@{1:s}'.format(term[0], term[1]) elif type(term) is type and hasattr(term, 'uri'): return u'{0:s}'.format elif hasattr(term, 'subject'): return u'{0:s}'.format return term.__str__()
class BrickEndpoint(object): def __init__(self, sparql_url, brick_version, base_ns='', load_schema=True): self.BRICK_VERSION = brick_version self.sparql_url = sparql_url self.sparql = SPARQLWrapper(endpoint=self.sparql_url, updateEndpoint=self.sparql_url + '-auth') self.sparql.queryType = SELECT self.sparql.setCredentials('dba', 'dba') self.sparql.setHTTPAuth(DIGEST) if not base_ns: base_ns = 'http://example.com/' self.BASE = Namespace(base_ns) self.base_graph = base_ns.strip('/') self.sparql.addDefaultGraph(self.base_graph) self.sparql.addParameter("default-graph-uri", self.base_graph) self.BRICK = Namespace( 'https://brickschema.org/schema/{0}/Brick#'\ .format(self.BRICK_VERSION)) self.BRICK_USE = Namespace( 'https://brickschema.org/schema/{0}/BrickUse#'\ .format(self.BRICK_VERSION)) self.BF = Namespace( 'https://brickschema.org/schema/{0}/BrickFrame#'\ .format(self.BRICK_VERSION)) self.BRICK_TAG = Namespace( 'https://brickschema.org/schema/{0}/BrickTag#'\ .format(self.BRICK_VERSION)) self.namespaces = { '': self.BASE, 'base': self.BASE, 'brick': self.BRICK, 'bf': self.BF, 'brick_tag': self.BRICK_TAG, 'brick_use': self.BRICK_USE, 'rdfs': RDFS, 'rdf': RDF, 'owl': OWL, 'foaf': FOAF } self.q_prefix = '' #self.q_prefix = 'DEFINE input:inference <adxrules>\n' for prefix, ns in self.namespaces.items(): if 'uri' in dir(ns): ns_n3 = ns.uri.n3() else: ns_n3 = ns[''].n3() self.q_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3) self.q_prefix += '\n' self._init_brick_constants() if load_schema: self.load_schema() self.init_topclasses() def add_namespace(self, prefix, ns): ns = Namespace(ns) self.namespaces[prefix] = ns if 'uri' in dir(ns): ns_n3 = ns.uri.n3() else: ns_n3 = ns[''].n3() self.q_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3) def _init_brick_constants(self): self.HAS_LOC = URIRef(self.BF + 'hasLocation') def _get_sparql(self): # If need to optimize accessing sparql object. return self.sparql def update(self, qstr): return self.query(qstr, is_update=True) def _format_select_res(self, raw_res): var_names = raw_res['head']['vars'] values = [[ row[var_name]['value'] if var_name in row else None for var_name in var_names ] for row in raw_res['results']['bindings']] var_names = ['?' + var_name for var_name in var_names] return [var_names, values] def parse_result(self, res): raw_res = res common_res = res return common_res, raw_res def raw_query(self, qstr): return self.query(qstr) def query(self, qstr, is_update=False): sparql = self._get_sparql() if is_update: sparql.setMethod(POST) else: sparql.setMethod(GET) sparql.setReturnFormat(JSON) qstr = self.q_prefix + qstr sparql.setHTTPAuth sparql.setQuery(qstr) raw_res = sparql.query().convert() if sparql.queryType == SELECT: res = self._format_select_res(raw_res) elif sparql.queryType == INSERT: res = raw_res # TODO: Error handling here elif sparql.queryType == 'LOAD': res = raw_res # TODO: Error handling here return res def _create_insert_query(self, triples): q = """ INSERT DATA {{ GRAPH <{0}> {{ """.format(self.base_graph) for triple in triples: triple_str = ' '.join([term.n3() for term in triple]) + ' .\n' q += triple_str q += """} } """ return q def _is_bool(self, s): s = s.lower() if s == 'true' or s == 'false': return True else: return False def _str2bool(self, s): s = s.lower() if s == 'true': return True elif s == 'false': return False else: raise Exception('{0} is not convertible to boolean'.format(s)) def _is_float(self, s): try: float(s) return True except: return False def _parse_term(self, term): if isinstance(term, URIRef) or isinstance(term, Literal): return term elif isinstance(term, str): if 'http' == term[0:4]: node = URIRef(term) elif ':' in term: #TODO: This condition is dangerous. [ns, id_] = term.split(':') ns = self.namespaces[ns] node = ns[id_] else: if term.isdigit(): term = int(term) elif self._is_float(term): term = float(term) elif self._is_bool(term): term = self._str2bool(term) else: # Otherwise, just str pass node = Literal(term) else: node = Literal(term) return node def add_triple(self, pseudo_s, pseudo_p, pseudo_o): triple = self.make_triple(pseudo_s, pseudo_p, pseudo_o) return self._add_triples([triple]) def make_triple(self, pseudo_s, pseudo_p, pseudo_o): s = self._parse_term(pseudo_s) p = self._parse_term(pseudo_p) o = self._parse_term(pseudo_o) return (s, p, o) def add_triples(self, pseudo_triples): if not pseudo_triples: # TODO: Define the right format same ass _add_triples. return True triples = [ self.make_triple(*pseudo_triple) for pseudo_triple in pseudo_triples ] return self._add_triples(triples) def _add_triples(self, triples): q = self._create_insert_query(triples) res = self.update(q) return res def add_brick_instance(self, entity_name, tagset): entity = URIRef(self.BASE + entity_name) tagset = URIRef(self.BRICK + tagset) triples = [(entity, RDF.type, tagset)] self._add_triples(triples) return str(entity) def load_ttlfile(self, filepath): q = """ load <file://{0}> into <{1}> """.format(filepath, self.base_graph) res = self.update(q) def load_schema(self): schema_urls = [ str(ns)[:-1] + '.ttl' for ns in [self.BRICK, self.BRICK_USE, self.BF, self.BRICK_TAG] ] load_query_template = 'LOAD <{0}> into <{1}>' for schema_url in schema_urls: qstr = load_query_template.format( schema_url.replace('https', 'http'), self.base_graph) res = self.update(qstr) def init_topclasses(self, force=False): topclasses_file = 'Brick/topclasses.json' if os.path.isfile(topclasses_file) and not force: with open(topclasses_file, 'r') as fp: self.topclasses = json.load(fp) else: self.topclasses = {} target_topclasses = ['Point', 'Equipment', 'Location'] qstr_template = """ select ?tagset where {{ ?tagset rdfs:subClassOf* brick:{0}. }} """ for topclass in target_topclasses: qstr = qstr_template.format(topclass) res = self.query(qstr) for [tagset] in res[1]: self.topclasses[tagset.split('#') [-1].lower()] = topclass.lower() with open(topclasses_file, 'w') as fp: json.dump(self.topclasses, fp, indent=2) def sparqlres2df(self, res): column_names = res[0] data = res[1] return pd.DataFrame(data=data, columns=column_names) def sparqlres2csv(self, res, filename): self.sparqlres2df(res).to_csv(filename) def get_top_class(self, tagset): pass def get_tagset_type(self, tagset): pure_tagset = tagset.split('-')[0] postfix = pure_tagset.split('_')[-1].lower() if postfix in [ 'server', 'networkadapter', ]: return 'networkadapter' else: topclass = self.topclasses.get(pure_tagset, None) if topclass: return topclass.lower() else: if postfix in [ 'sensor', 'setpoint', 'status', 'alarm', 'command', ]: return 'point' else: return 'unidentified' def normalize2uri(self, s): return '_'.join(re.findall('[a-zA-Z0-9]+', s)) def serialize_graph(self, filename, nobrick=True): g = rdflib.Graph() qstr = """ select ?s ?p ?o where { ?s ?p ?o. } """ res = self.query(qstr) for row in res[1]: g.add((URIRef(row[0]), URIRef(row[1]), URIRef(row[2]))) g.serialize(filename, format='turtle') def check_tag_in_tagset(self, tag, tagset): TODO
class Sparql(object): def __init__(self, entities, config_file, dataset, endpoint, default_graph, entity_class): self.entities = entities # file containing a list of entities self.dataset = dataset self.wrapper = SPARQLWrapper(endpoint) self.wrapper.setReturnFormat(JSON) if default_graph: self.default_graph = default_graph self.wrapper.addDefaultGraph(self.default_graph) self.entity_class = entity_class self.query_prop = "SELECT ?s ?o WHERE {?s %s ?o. }" self.query_prop_uri = "SELECT ?s ?o WHERE {?s %s ?o. FILTER (?s = %s)}" self._define_properties(config_file) def _define_properties(self, config_file): with codecs.open(config_file, 'r', encoding='utf-8') as config_read: property_file = json.loads(config_read.read()) try: self.properties = [i for i in property_file[self.dataset]] print(self.properties) except KeyError: print("No set of properties provided in the dataset") if not self.entity_class: query_all_prop = "SELECT distinct ?p " \ "WHERE {?s ?p ?o. FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))}" self._get_properties(query_all_prop) else: query_category_prop = "select distinct ?p " \ "where { ?s a dbo:Band. ?s ?p ?o. " \ "FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))} " self._get_properties(query_category_prop) def _get_properties( self, query ): # get all the properties from sparql endpoint if a list is not provided in config file self.properties = [] self.wrapper.setQuery(query) self.wrapper.setReturnFormat(JSON) for results in self.wrapper.query().convert()['results']['bindings']: self.properties.append(results['p']['value']) self.properties.append("dct:subject") self.properties.append("rdf:type") def get_property_graphs(self): properties = self.properties if 'feedback' in properties: properties.remove( 'feedback') # don't query for the feedback property for prop in properties: # iterate on the properties prop_short = prop prop_namespace = prop if '/' in prop: # avoid creating file with a '/' in the name prop_short = prop.split('/')[-1] # if it is actually a URI, surround by "<>" if prop.startswith("http"): prop_namespace = '<' + prop + '>' try: mkdir('datasets/%s/' % self.dataset) mkdir('datasets/%s/graphs' % self.dataset) except: pass with codecs.open('datasets/%s/graphs/%s.edgelist' % (self.dataset, prop_short), 'w', encoding='utf-8' ) as prop_graph: #open a property file graph if self.entities == "all": self.wrapper.setQuery(self.query_prop % prop_namespace) for result in self.wrapper.query().convert( )['results']['bindings']: subj = result['s']['value'] obj = result['o']['value'] print((subj, obj)) prop_graph.write('%s %s\n' % (subj, obj)) else: with codecs.open( '%s' % self.entities, 'r', encoding='utf-8' ) as f: # open entity file, select only those entities for uri in f: # for each entity uri = uri.strip('\n') uri = '<' + uri + '>' self.wrapper.setQuery(self.query_prop_uri % (prop_namespace, uri)) for result in self.wrapper.query().convert( )['results']['bindings']: subj = result['s']['value'] obj = result['o']['value'] print((subj, obj)) prop_graph.write('%s %s\n' % (subj, obj)) f.seek(0) # reinitialize iterator @staticmethod def get_uri_from_wiki_id(wiki_id): sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setQuery( """select ?s where {?s <http://dbpedia.org/ontology/wikiPageID> %d }""" % int(wiki_id)) sparql.setReturnFormat(JSON) try: uri = sparql.query().convert( )['results']['bindings'][0]['s']['value'] except: uri = None return uri
class Graph_Linker(): def __init__(self, *args, **kwargs): self.sparql = SPARQLWrapper(os.getenv('SPARQL_ENDPOINT')) self.sparql.addDefaultGraph(os.getenv('SPARQL_GRAPH')) return super().__init__(*args, **kwargs) def link_authors(self): print('Linking authors...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?a rdf:type dblp:Author } WHERE { SELECT DISTINCT ?a WHERE { ?a dblp:write ?p . FILTER NOT EXISTS { ?a rdf:type dblp:Author } } } """) self.sparql.query() print('Authors linked to https://dblp.org/ontologies/Author.') def link_papers(self): print('Linking papers...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX bibo: <http://purl.org/ontology/bibo/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?p rdf:type bibo:AcademicArticle } WHERE { SELECT DISTINCT ?p WHERE { ?a dblp:write ?p . FILTER NOT EXISTS { ?p rdf:type bibo:AcademicArticle } } } """) self.sparql.query() print( 'Papers linked to http://purl.org/ontology/bibo/AcademicArticle.') def link_reviewers(self): print('Linking reviewers...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX bibo: <http://purl.org/ontology/bibo/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?s rdf:type dblp:Reviewer } WHERE { SELECT DISTINCT ?s WHERE { ?s dblp:writeReview ?r . ?r dblp:about ?p . ?p rdf:type bibo:AcademicArticle . FILTER NOT EXISTS { ?s rdf:type dblp:Reviewer } } } """) self.sparql.query() print('Reviewers linked to https://dblp.org/ontologies/Reviewer.') def link_schools(self): print('Linking schools...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?s rdf:type dblp:School } WHERE { SELECT DISTINCT ?s WHERE { ?a dblp:affiliatedWith ?s . FILTER ( regex(str(?s), "/schools/" )) . FILTER NOT EXISTS { ?s rdf:type dblp:School } } } """) self.sparql.query() print('Schools linked to https://dblp.org/ontologies/School.') def link_journals(self): print('Linking journals...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?j rdf:type dbo:AcademicJournal } WHERE { SELECT DISTINCT ?j WHERE { ?p dblp:publishedIn ?j . FILTER ( regex(str(?s), "/journals/" )) . FILTER NOT EXISTS { ?j rdf:type dbo:AcademicJournal } } } """) self.sparql.query() print( 'Journals linked to http://dbpedia.org/ontology/AcademicJournal.') def link_conferences(self): print('Linking conferences...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?c rdf:type dbo:AcademicConference } WHERE { SELECT DISTINCT ?c WHERE { ?p dblp:publishedIn ?c . FILTER ( regex(str(?c), "/conf/" )) . FILTER NOT EXISTS { ?c rdf:type dbo:AcademicConference } } } """) self.sparql.query() print( 'Conferences linked to http://dbpedia.org/ontology/AcademicConference.' ) def link_random_open_access_journals(self): print('Generating and linking random open access journals...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?j rdf:type dblp:OpenAccessJournal } WHERE { SELECT DISTINCT ?j WHERE { ?j rdf:type dbo:AcademicJournal . FILTER NOT EXISTS { ?j rdf:type dblp:OpenAccessJournal . ?j rdf:type dblp:CloseAccessJournal } } ORDER BY RAND() LIMIT 200 } """) self.sparql.query() print( 'Open access journals generated and linked to https://dblp.org/ontologies/OpenAccessJournal.' ) def link_random_close_access_journals(self): print('Generating and linking random close access journals...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?j rdf:type dblp:CloseAccessJournal } WHERE { SELECT DISTINCT ?j WHERE { ?j rdf:type dbo:AcademicJournal . FILTER NOT EXISTS { ?j rdf:type dblp:OpenAccessJournal . ?j rdf:type dblp:CloseAccessJournal } } ORDER BY RAND() LIMIT 200 } """) self.sparql.query() print( 'Close access journals generated and linked to https://dblp.org/ontologies/CloseAccessJournal.' ) def link_algorithm_conferences(self): print('Linking algorithm conferences...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?c rdf:type dblp:AlgorithmConference } WHERE { SELECT DISTINCT ?c WHERE { ?c rdf:type dbo:AcademicConference . ?p dblp:publishedIn ?c . ?p dblp:keyword ?k . FILTER(str(?k) IN ('algorithm')) . FILTER NOT EXISTS { ?c rdf:type dblp:AlgorithmConference } } } """) self.sparql.query() print( 'Algorithm conferences linked to https://dblp.org/ontologies/AlgorithmConference.' ) def link_network_conferences(self): print('Linking network conferences...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?c rdf:type dblp:NetworkConference } WHERE { SELECT DISTINCT ?c WHERE { ?c rdf:type dbo:AcademicConference . ?p dblp:publishedIn ?c . ?p dblp:keyword ?k . FILTER(str(?k) IN ('network', 'networks', 'cloud', 'internet', 'wlans')) . FILTER NOT EXISTS { ?c rdf:type dblp:NetworkConference } } } """) self.sparql.query() print( 'Network conferences linked to https://dblp.org/ontologies/NetworkConference.' ) def link_database_conferences(self): print('Linking database conferences...') self.sparql.setQuery(""" PREFIX dblp: <https://dblp.org/ontologies/> PREFIX dbo: <http://dbpedia.org/ontology/> PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> INSERT { ?c rdf:type dblp:DatabaseConference } WHERE { SELECT DISTINCT ?c WHERE { ?c rdf:type dbo:AcademicConference . ?p dblp:publishedIn ?c . ?p dblp:keyword ?k . FILTER(str(?k) IN ('data', 'database', 'databases')) . FILTER NOT EXISTS { ?c rdf:type dblp:DatabaseConference } } } """) self.sparql.query() print( 'Database conferences linked to https://dblp.org/ontologies/DatabaseConference.' )
class BrickEndpoint(object): def __init__(self, sparql_url, brick_version, base_ns='', load_schema=True): BRICK_VERSION = brick_version self.sparql_url = sparql_url self.sparql = SPARQLWrapper(endpoint=self.sparql_url, updateEndpoint=self.sparql_url + '-auth') self.sparql.queryType = SELECT self.sparql.setCredentials('dba', 'dba') self.sparql.setHTTPAuth(DIGEST) if not base_ns: base_ns = 'http://example.com/' self.base_graph = 'urn:' + str(gen_uuid()) self.sparql.addDefaultGraph(self.base_graph) self.namespaces = { '': BASE, 'brick': BRICK, 'bf': BF, 'rdfs': RDFS, 'rdf': RDF, 'owl': OWL, 'foaf': FOAF } sparql_prefix = '' #for prefix, ns in self.namespaces.items(): # ns_n3 = ns.uri.n3() # sparql_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3) #sparql_prefix += '\n' self._init_brick_constants() if load_schema: self.load_schema() def _init_brick_constants(self): self.HAS_LOC = URIRef(BF + 'hasLocation') def _get_sparql(self): # If need to optimize accessing sparql object. return self.sparql def update(self, qstr): return self.query(qstr, is_update=True) def _format_select_res(self, raw_res): var_names = raw_res['head']['vars'] values = [{ var_name: row[var_name]['value'] if var_name in row else None for var_name in var_names } for row in raw_res['results']['bindings']] #var_names = [var_name for var_name in var_names] #return [var_names, values] return values def parse_result(self, res): raw_res = res common_res = res return common_res, raw_res def raw_query(self, qstr): return self.query(qstr) def query(self, qstr, is_update=False): sparql = self._get_sparql() if is_update: sparql.setMethod(POST) else: sparql.setMethod(GET) sparql.setReturnFormat(JSON) qstr = sparql_prefix + qstr sparql.setHTTPAuth sparql.setQuery(qstr) raw_res = sparql.query().convert() if sparql.queryType == SELECT: res = self._format_select_res(raw_res) elif sparql.queryType == INSERT: res = raw_res # TODO: Error handling here elif sparql.queryType == 'LOAD': res = raw_res # TODO: Error handling here else: res = raw_res return res def _create_insert_query(self, triples): q = """ INSERT DATA {{ GRAPH <{0}> {{ """.format(self.base_graph) for triple in triples: # triple_str = ' '.join([term.n3() for term in triple]) + ' .\n' triple_str = ' '.join( ['<{0}>'.format(str(term)) for term in triple]) + ' .\n' q += triple_str q += """} } """ return q def _is_bool(self, s): s = s.lower() if s == 'true' or s == 'false': return True else: return False def _str2bool(self, s): s = s.lower() if s == 'true': return True elif s == 'false': return False else: raise Exception('{0} is not convertible to boolean'.format(s)) def _is_float(self, s): try: float(s) return True except: return False def _parse_term(self, term): if isinstance(term, URIRef) or isinstance(term, Literal): return term elif isinstance(term, str): if 'http' == term[0:4]: node = URIRef(term) elif ':' in term: #TODO: This condition is dangerous. [ns, id_] = term.split(':') ns = self.namespaces[ns] node = ns[id_] else: if term.isdigit(): term = int(term) elif self._is_float(term): term = float(term) if self._is_bool(term): term = _str2bool(term) node = Literal(term) else: node = Literal(term) return node def add_triple(self, pseudo_s, pseudo_p, pseudo_o): triple = self.make_triple(pseudo_s, pseudo_p, pseudo_o) self.add_triples([triple]) def add(self, triple): self.add_triples([triple]) def make_triple(self, pseudo_s, pseudo_p, pseudo_o): s = self._parse_term(pseudo_s) p = self._parse_term(pseudo_p) o = self._parse_term(pseudo_o) return (s, p, o) def add_triples(self, pseudo_triples): triples = [ self.make_triple(*pseudo_triple) for pseudo_triple in pseudo_triples ] self._add_triples(triples) def _add_triples(self, triples): q = self._create_insert_query(triples) res = self.update(q) def add_brick_instance(self, entity_name, tagset): entity = URIRef(BASE + entity_name) tagset = URIRef(BRICK + tagset) triples = [(entity, RDF.type, tagset)] self._add_triples(triples) return str(entity) def load_ttlfile(self, filepath): q = """ load <file://{0}> into <{1}> """.format(filepath, self.base_graph) res = self.update(q) def load_schema(self): self.load_ttlfile(BRICK_FILE) self.load_ttlfile(BF_FILE) def parse(self, filepath, format=None): self.load_ttlfile(filepath) def serialize(self): qstr = """ select ?s ?p ?o where{ ?s ?p ?o . FILTER(STRSTARTS(STR(?s), "%s")) } """ % (BASE) res = self.raw_query(qstr) return res def __add__(self, other): assert isinstance(other, BrickEndpoint) qstr = """ select ?s ?p ?o where{ ?s ?p ?o . FILTER(STRSTARTS(STR(?s), "%s")) } """ % (BASE) res = other.raw_query(qstr) triples = [(URIRef(row['s']), URIRef(row['p']), URIRef(row['o'])) for row in res] triple_chunks = chunks(triples, 300) for chunk in triple_chunks: self._add_triples(chunk) return self
class WriterPlugin(RDFWriter): def __init__(self, reader, *args, **kwargs): super(WriterPlugin, self).__init__(reader, *args, **kwargs) if isinstance(self.reader, ReaderPlugin): self._endpoint = self.reader.endpoint else: self._endpoint = kwargs.get("endpoint") self._combine_queries = kwargs.get("combine_queries") self._results_format = JSON self._sparql_wrapper = SPARQLWrapper(self._endpoint, returnFormat=self._results_format) user = kwargs.get('user', None) password = kwargs.get('password', None) if user is not None and password is not None: self._sparql_wrapper.setCredentials(user, password) self._sparql_wrapper.setMethod("POST") default_graph = kwargs.get('default_graph',None) if default_graph: self._sparql_wrapper.addDefaultGraph(default_graph) @property def endpoint(self): return self._endpoint def _save(self, *resources): for context, items in _group_by_context(resources).iteritems(): # Deletes all triples with matching subjects. remove_query = _prepare_delete_many_query(items, context) insert_query = _prepare_add_many_query(items, context) self._execute(remove_query, insert_query) def _update(self, *resources): for context, items in _group_by_context(resources).iteritems(): # Explicitly enumerates triples for deletion. remove_query = _prepare_selective_delete_query(items, context) insert_query = _prepare_add_many_query(items, context) self._execute(remove_query, insert_query) def _remove(self, *resources, **kwargs): for context, items in _group_by_context(resources).iteritems(): # Deletes all triples with matching subjects. inverse = kwargs.get("inverse") query = _prepare_delete_many_query(items, context, inverse) self._execute(query) def _size(self): """ Return total count of triples, not implemented. """ raise NotImplementedError def _add_triple(self, s=None, p=None, o=None, context=None): self._add(s, p, o, context) def _set_triple(self, s=None, p=None, o=None, context=None): self._remove_from_endpoint(s, p, context=context) self._add(s, p, o, context) def _remove_triple(self, s=None, p=None, o=None, context=None): self._remove_from_endpoint(s, p, o, context) def _execute(self, *queries): """ Execute several queries. """ translated = [unicode(query) for query in queries] if self._combine_queries: translated = ["\n".join(translated)] try: for query_str in translated: debug(query_str) self._sparql_wrapper.setQuery(query_str) self._sparql_wrapper.query() return True except EndPointNotFound, _: raise SparqlWriterException("Endpoint not found"), None, sys.exc_info()[2] except QueryBadFormed, _: raise SparqlWriterException("Bad query: %s" % query_str), None, sys.exc_info()[2]
async def prenten(request): sparql = SPARQLWrapper(SPARQL_URI, returnFormat=JSON) sparql.addDefaultGraph("http://lod.kb.nl/gvn/ubl01/") sparql.setQuery(SPARQL_QUERY) res = sparql.query() return web.json_response(data=res.convert())
class Spar(object): ''' class to query DBpedia for URI identity; URIs are identical when connected by page redirects ''' def __init__(self): ''' constructor ''' self.sparql = SPARQLWrapper("http://dbpedia.org/sparql") self.sparql.addDefaultGraph("http://dbpedia.org") def isEqualTest(self,uri1, uri2): ''' check for identity of URIs (test) ''' spar = ("ASK {<" + uri1 + "> " "(<http://dbpedia.org/ontology/wikiPageRedirects>|" "^<http://dbpedia.org/ontology/wikiPageRedirects>)* " "<" + uri2 + ">}") #print spar return self.evalQuery(spar) def isEqual(self,uri1, uri2): ''' check for identity of URIs ''' spar = ("ASK {<" + uri1 + "> " "(<http://dbpedia.org/ontology/wikiPageRedirects>|" "^<http://dbpedia.org/ontology/wikiPageRedirects>)* " "<" + uri2 + ">}") #print self.evalRes(self.evalQuery(spar)) return True and self.evalRes(self.evalQuery(spar)) def evalQuery(self,query): ''' evaluate query remotely ''' time.sleep(1) # run one query per second try: self.sparql.setReturnFormat(JSON) self.sparql.setQuery(query) print "running", query return self.sparql.query().convert() except: return False def evalRes(self,results): ''' check for value of ask query ''' if results == False: return results else: return results['boolean'] def printRes(self, results): ''' print raw JSON results (tests) ''' print results def test(self): ''' test if it works ''' uri1 = "http://dbpedia.org/resource/NaN" uri2 = "NaN" for i in range(20): print "query", i, self.isEqual(uri1, uri2) # # execute # if __name__ == '__main__': # sp = Spar() # sp.test()
class Sparql(object): """SPARQL queries to define property list and get property-specific subgraphs""" def __init__(self, entities, config_file, dataset, endpoint, default_graph): self.entities = entities # file containing a list of entities self.dataset = dataset self.wrapper = SPARQLWrapper(endpoint) self.wrapper.setReturnFormat(JSON) if default_graph: self.default_graph = default_graph self.wrapper.addDefaultGraph(self.default_graph) self.query_prop = "SELECT ?s ?o WHERE {?s %s ?o. }" self.query_prop_uri = "SELECT ?s ?o WHERE {?s %s ?o. FILTER (?s = %s)}" self._define_properties(config_file) def _define_properties(self, config_file): self.properties = [] with codecs.open(config_file, 'r', encoding='utf-8') as config_read: property_file = json.loads(config_read.read()) for property_name in property_file[self.dataset]['content']: if 'feedback_' in property_name: property_name = property_name.replace('feedback_', '') self.properties.append(property_name) def get_property_graphs(self): properties = self.properties if 'feedback' in properties: properties.remove('feedback') # don't query for the feedback property for prop in properties: # iterate on the properties prop_short = prop prop_namespace = prop if '/' in prop: # avoid creating file with a '/' in the name prop_short = prop.split('/')[-1] # if it is actually a URI, surround by "<>" if prop.startswith("http"): prop_namespace = '<' + prop + '>' try: mkdir('datasets/%s/' % self.dataset) mkdir('datasets/%s/graphs' % self.dataset) except: pass with codecs.open('datasets/%s/graphs/%s.edgelist' % (self.dataset, prop_short), 'w', encoding='utf-8') as prop_graph: # open a property file graph for uri in self.entities: uri = '<' + uri + '>' self.wrapper.setQuery(self.query_prop_uri % (prop_namespace, uri)) for result in self.wrapper.query().convert()['results']['bindings']: subj = result['s']['value'] obj = result['o']['value'] print((subj, obj)) prop_graph.write('%s %s\n' % (subj, obj)) @staticmethod def get_uri_from_wiki_id(wiki_id): sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setQuery("""select ?s where {?s <http://dbpedia.org/ontology/wikiPageID> %d }""" % int(wiki_id)) sparql.setReturnFormat(JSON) try: uri = sparql.query().convert()['results']['bindings'][0]['s']['value'] except: uri = None return uri @staticmethod def get_item_metadata(uri, item_type, thumbnail_exists): sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setQuery("""select ?labelo ?labelp ?labels ?description ?abstract ?homepage ?authorlabo ?authorlabp ?authorlabs where { OPTIONAL { <%s> <http://dbpedia.org/ontology/label> ?labelo . FILTER(lang(?labelo) = 'en' ) } OPTIONAL { <%s> <http://dbpedia.org/property/label> ?labelp . FILTER(lang(?labelp) = 'en' ) } OPTIONAL { <%s> <http://www.w3.org/2000/01/rdf-schema#label> ?labels. FILTER(lang(?labels) = 'en' ) } OPTIONAL { <%s> <http://purl.org/dc/terms/description> ?description . FILTER (lang(?description) = 'en') } OPTIONAL{ <%s> <http://xmlns.com/foaf/0.1/homepage> ?homepage . } OPTIONAL { <%s> <http://dbpedia.org/ontology/abstract> ?abstract . FILTER (lang(?abstract) = 'en') } OPTIONAL { <%s> dbo:author ?o. ?o rdfs:label ?authorlabs. FILTER (lang(?authorlabs) = 'en') } OPTIONAL { <%s> dbo:author ?o. ?o dbo:label ?authorlabo. FILTER (lang(?authorlabo) = 'en') } OPTIONAL { <%s> dbo:author ?o. ?o dbp:label ?authorlabp. FILTER (lang(?authorlabp) = 'en') } }""" % (uri, uri, uri, uri, uri, uri, uri, uri, uri)) sparql.setReturnFormat(JSON) try: # check whether it does not return an empty list result_raw = sparql.query().convert()['results']['bindings'][0] result = {} for key, value in result_raw.items(): result[key] = value['value'] c = 0 try: result['label'] = result['labels'] except KeyError: c+=1 pass try: result['label'] = result['labelp'] except KeyError: c+=1 pass try: result['label'] = result['labelo'] except KeyError: c+=1 pass # at least one label must be there if c == 3: result = None # same with author c = 0 try: result['author'] = result['authorlabs'] except KeyError: c+=1 pass try: result['author'] = result['authorlabp'] except KeyError: c+=1 pass try: result['author'] = result['authorlabo'] except KeyError: c+=1 pass # at least one label must be there if c == 3: result = None # either abstract or description must be there if 'abstract' not in result.keys() and 'description' not in result.keys(): result = None if not thumbnail_exists: # scrape google for thumbnail out = subprocess.check_output(["googleimagesdownload", "--keywords", "\"%s %s %s\"" % (result['label'].replace(',',''), result['author'], item_type), "--print_urls", "-l", "1"]) url = out.decode('utf-8').split('\n')[4].replace('Image URL: ','') result['thumbnail'] = url if not result['thumbnail']: # skip item if there is not thumbnail result = None except: result = None return result
"""Virtuoso driver for graph database The UWKGM project :copyright: (c) 2020 Ichise Laboratory at NII & AIST :author: Rungsiman Nararatwong """ from dorest import env from SPARQLWrapper import SPARQLWrapper, JSON from database.database.graph import default_graph_uri config = env.resolve('database.virtuoso') if 'port' in config: client = SPARQLWrapper('%s:%s/sparql/' % (config['address'], config['port'])) else: client = SPARQLWrapper('%s/sparql/' % config['address']) client.addDefaultGraph(default_graph_uri) client.setReturnFormat(JSON)
from flask import Flask from SPARQLWrapper import SPARQLWrapper, JSON import os from dotenv import load_dotenv from flask import request from flask import abort app = Flask(__name__) load_dotenv() # Set Up SPARQL Connection sparql = SPARQLWrapper(os.getenv('SPARQL_ENDPOINT')) sparql.setReturnFormat(JSON) sparql.addDefaultGraph('http://*****:*****@app.route('/productcategory/', methods=['GET']) def get_product_category(): product_name = request.args.get('product_name') product_name = product_name.replace('"','') print(product_name) """Get category of product given product name. Args: product_name (str): The product name. It should be url encoded, for example "Charcoal, sack" -> Charcoal%2C%20sack Returns: str: The category of queried product name. """ q = ( f""" PREFIX reachIT: <http://www.reach-it.com/ontology/> SELECT str(?c) as ?type
class Sparql(object): def __init__ (self, entities, config_file, dataset, endpoint, default_graph, entity_class): self.entities = entities # file containing a list of entities self.dataset = dataset self.wrapper = SPARQLWrapper(endpoint) self.wrapper.setReturnFormat(JSON) if default_graph: self.default_graph = default_graph self.wrapper.addDefaultGraph(self.default_graph) self.entity_class = entity_class self.query_prop = "SELECT ?s ?o WHERE {?s %s ?o. }" self.query_prop_uri = "SELECT ?s ?o WHERE {?s %s ?o. FILTER (?s = %s)}" self._define_properties(config_file) def _define_properties(self, config_file): with codecs.open(config_file, 'r', encoding='utf-8') as config_read: property_file = json.loads(config_read.read()) try: self.properties = [i for i in property_file[self.dataset]] print(self.properties) except KeyError: print("No set of properties provided in the dataset") if not self.entity_class: query_all_prop = "SELECT distinct ?p " \ "WHERE {?s ?p ?o. FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))}" self._get_properties(query_all_prop) else: query_category_prop = "select distinct ?p " \ "where { ?s a dbo:Band. ?s ?p ?o. " \ "FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))} " self._get_properties(query_category_prop) def _get_properties(self, query): # get all the properties from sparql endpoint if a list is not provided in config file self.properties = [] self.wrapper.setQuery(query) self.wrapper.setReturnFormat(JSON) for results in self.wrapper.query().convert()['results']['bindings']: self.properties.append(results['p']['value']) self.properties.append("dct:subject") self.properties.append("rdf:type") def get_property_graphs(self): properties = self.properties if 'feedback' in properties: properties.remove('feedback') # don't query for the feedback property for prop in properties: # iterate on the properties prop_short = prop prop_namespace = prop if '/' in prop: # avoid creating file with a '/' in the name prop_short = prop.split('/')[-1] # if it is actually a URI, surround by "<>" if prop.startswith("http"): prop_namespace = '<'+prop+'>' try: mkdir('datasets/%s/'% self.dataset) mkdir('datasets/%s/graphs' % self.dataset) except: pass with codecs.open('datasets/%s/graphs/%s.edgelist' %(self.dataset, prop_short),'w', encoding='utf-8') as prop_graph: #open a property file graph if self.entities == "all": self.wrapper.setQuery(self.query_prop%prop_namespace) for result in self.wrapper.query().convert()['results']['bindings']: subj = result['s']['value'] obj = result['o']['value'] print((subj, obj)) prop_graph.write('%s %s\n' %(subj, obj)) else: with codecs.open('%s'%self.entities,'r', encoding='utf-8') as f: # open entity file, select only those entities for uri in f: # for each entity uri = uri.strip('\n') uri = '<'+uri+'>' self.wrapper.setQuery(self.query_prop_uri%(prop_namespace,uri)) for result in self.wrapper.query().convert()['results']['bindings']: subj = result['s']['value'] obj = result['o']['value'] print((subj, obj)) prop_graph.write('%s %s\n' %(subj, obj)) f.seek(0) # reinitialize iterator @staticmethod def get_uri_from_wiki_id(wiki_id): sparql = SPARQLWrapper("http://dbpedia.org/sparql") sparql.setQuery("""select ?s where {?s <http://dbpedia.org/ontology/wikiPageID> %d }""" %int(wiki_id)) sparql.setReturnFormat(JSON) try: uri = sparql.query().convert()['results']['bindings'][0]['s']['value'] except: uri = None return uri
config.read('config.ini') URI = sys.argv[1] classType = sys.argv[2] endpoint_uri = config['Mandatory']['endpointURI'] graph_uri = config['Mandatory']['graphURI'] # Set up endpoint and access to triple store sparql = SPARQLWrapper(endpoint_uri) sparql.setReturnFormat(JSON) sparql.setMethod(POST) store = SPARQLUpdateStore(endpoint_uri, endpoint_uri) # Specify the (named) graph we're working with sparql.addDefaultGraph(graph_uri) # Create an in memory graph g = Graph(store, identifier=graph_uri) query = "select ?p ?o where {<"+ URI +"> ?p ?o}" properties = g.query (query) # Configurations mappings mapping = ConfigParser() mapping.read('mapping_fields.ini') propURI = "" props = "" for row in properties: propURI = str(row[0])