Ejemplo n.º 1
0
def contact_sparql_endpoint():
    # si contatta lo SPARQL endpoint per un certo dataset (nome dataset = /data)
    # SPARQL service = SPARQL endpoint = triplestore che si occupa di memorizzare le annotazioni
    # si chiede di restituire i risultati in formato JSON

    # Lo SPARQL endpoint locale del progetto è raggiungibile al seguente URL: http://localhost:3030/data/query
    # sparql_endpoint = SPARQLWrapper("http://localhost:3030/data/query", returnFormat="json")

    # Lo SPARQL endpoint ufficiale del progetto è raggiungibile al seguente URL: http://tweb2015.cs.unibo.it:8080/data
    # ogni gruppo ha un grafo su questo stesso endpoint, l'IRI é: http://vitali.web.cs.unibo.it/raschietto/graph/ltw1537
    # necessaria l'autenticazione ttramite user=ltw1537, password=
    # sparql_endpoint = SPARQLWrapper("http://tweb2015.cs.unibo.it:8080/data", returnFormat="json")

    # SPARQL endpoint di esempio
    sparql_endpoint = SPARQLWrapper("http://dbpedia.org/sparql", returnFormat="json")

    # query di esempio
    query = """
        PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
        SELECT ?label
        WHERE { <http://dbpedia.org/resource/Asturias> rdfs:label ?label }
    """

    # set della query SPARQL
    sparql_endpoint.setQuery(query)

    # esecuzione della query
    results = sparql_endpoint.query().convert()

    # visualizzazione dei risultati
    for result in results["results"]["bindings"]:
        print(result["label"])
Ejemplo n.º 2
0
def Inizialier(endpoint):
#     mongo.startTest(endpoint) # add Lock
    q = queryGenerator.QueryGenerator()
    sparql = SPARQLWrapper(endpoint['url'])
    sparql.setTimeout(300)
    
    return endpoint,q,sparql
Ejemplo n.º 3
0
class SPARQLKB(KB):
    '''SPARQL endpoint knowledge base'''
    def __init__(self, sentence=None, endpoint=None):
        '''Constructor method
        Usage:
        sentence - RDF triple to be added to KB (not implemented; default:None)
        endpoint - URL of endpoint to query'''
        self.sparql = SPARQLWrapper(endpoint)
        if sentence:
            self.tell(sentence)

    def tell(self, sentence):
        '''Adding triples to RDF store - not implemented'''
        raise NotImplementedError('Adding sentences to RDF knowledge bases is not implemented')

    def ask(self, query):
        '''Queries the endpoint'''
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(XML)
        res = self.sparql.query().convert().getElementsByTagName('result')
        res = [dict([(bin.attributes['name'].nodeValue, bin.firstChild.firstChild.nodeValue) for bin in node.getElementsByTagName('binding')]) for node in res]
        return res

    def retract(self, sentence):
        '''Removing triples from RDF store - not implemented'''
        raise NotImplementedError('Removing sentences to RDF knowledge bases is not implemented')

    def _encode( self, key, value ):
	'''Encoding a value in the triple store - not implemented'''
	raise NotImplementedError('Encoding values in RDF knowledge bases is not implemented')

    def _decode( self, key ):
	'''Decoding a value from the triple store - not implemented'''
	raise NotImplementedError('Decoding values from RDF knowledge bases is not implemented')
def sparql():
    query = request.args.get("query", None)
    inferencing = request.args.get("inferencing")

    # If the query and inference variables are not empty, send the sparql query
    # to the stardog database and return the results as JSON
    if query and inferencing:

        sparql = SPARQLWrapper(TRIPLE_STORE + "/query")

        sparql.setQuery(query)

        sparql.setReturnFormat(JSON)
        sparql.addParameter("Accept", "application/sparql-results+json")

        sparql.addParameter("reasoning", inferencing)

        try:
            response = sparql.query().convert()

            return jsonify(response)
        except Exception as e:
            return jsonify({"result": "Error"})
    else:
        return jsonify({"result": "Error"})
Ejemplo n.º 5
0
def describe(request, type, path, format='rdf'):
    uri = '<http://doc.metalex.eu/{0}/{1}>'.format(type, path)
#    q = "DESCRIBE {0}".format(uri)

    # Get a symmetric concise bounded description (SCBD)
    q = "CONSTRUCT {"+uri+" ?p ?o . ?s ?p2 "+uri+" .} WHERE { {"+uri+" ?p ?o .} UNION {?s ?p2 "+uri+" .} }"
    
    
    sparql = SPARQLWrapper(SPARQL_ENDPOINT)
    sparql.setQuery(q)
    
    cg = sparql.queryAndConvert()    
    
    cg = setNamespaces(cg)
    
    if format=='ttl' :
        response = HttpResponse(cg.serialize(format='turtle'))
        response['Content-Type'] = 'application/x-turtle'
    elif format=='n3' :
        response = HttpResponse(cg.serialize(format='n3'))
        response['Content-Type'] = 'text/rdf+n3'
    elif format=='rdf' :
        response = HttpResponse(sparql.query())
        response['Content-Type'] = 'application/rdf+xml'
    else :
        t = get_template('message.html')
        html = t.render(RequestContext(request, { 'title': 'Oops', 'text' : 'We do not serve content of this type for this URI'}))
        return HttpResponse(html)        
        
    return response
Ejemplo n.º 6
0
class QueryManager:

    def __init__(self, endpoint=settings.ENDPOINT, updateEndpoint=settings.UPDATE, graph=settings.GRAPH, format=JSON):
        self.endpoint = SPARQLWrapper(endpoint)
        self.endpoint.setReturnFormat(format)
        # not using SPARQLwrapper because update endpoints don't seem to work
        self.updateEndpoint = updateEndpoint
        self.graph = graph

    def query(self, query):
        self.endpoint.setQuery(query)
        return self.endpoint.query().convert()
        
    def update(self, query):
        # not using SPARQLwrapper because update endpoints don't seem to work
        response = urllib2.urlopen(self.updateEndpoint, data='update='+query).read()
        if 'error' in response:
            return False
        else:
            return True

    def insert(self, query):
        self.update('INSERT DATA { GRAPH <' + self.graph + '> ' + query + '}')

        
    def ask(self, uri):
        q = "ASK { GRAPH <" + self.graph + "> { <" + uri + "> ?p ?o . } }"
        return self.query(q)['boolean']

    def describe(self, uri):
        if self.ask(uri):
            q = "DESCRIBE <" + uri + "> FROM <" + self.graph + ">"
            return self.query(q)[str(uri)]
        else:
            return False
def sparqlOneEndpoint( endpoint, query, apikey=None ):

    out = None

    try:

        sparql=SPARQLWrapper(endpoint)

        if apikey: sparql.addCustomParameter("apikey", apikey)

        sparql.setQuery(query)

        sparql.setReturnFormat(JSON)

        sparql.setTimeout( 30 )

        #print "\n", "# " * 7, "\n", query, "\n", "# " * 7, "\n"

        out = sparql.query().convert()

    except:

        print "Could not process formulated query on indicated endpoint."
        pass

    return out
Ejemplo n.º 8
0
class sparqlquerier:
	def __init__(self):
		self.sparql = SPARQLWrapper("https://linkeddata1.calcul.u-psud.fr/sparql")
		self.sparql.setReturnFormat(JSON)
		self.baseq = '''
					select  *
					where {
					        <http://yago-knowledge.org/resource/%s> ?property ?valueOrObject .
					        FILTER regex(str(?property ), "^http://www.w3.org/1999/02/22-rdf-syntax-ns#type")
					        FILTER regex(str(?valueOrObject ), "^http://yago-knowledge.org/resource/wikicat_")
					      } 
					LIMIT 100
					'''

	def query(self,name):
		name = "_".join(name.split())
		self.sparql.setQuery(self.baseq % (name))
		result = self.sparql.query().convert()
		return result

	def wikicat(self,socialtags):
		wcdict = {}
		for tag in socialtags:
			try:
				result = self.query(tag)
				result = result["results"]["bindings"]
			except Exception as e:
					print("query failed %s" % e)
					#logging.debug("query failed %s \n %s" % (e,result))
					continue
			for i in result:
				wcdict.setdefault(i["valueOrObject"]["value"],[])
				wcdict[i["valueOrObject"]["value"]].append(tag) # each wikicat maps a list of socialTag
		return wcdict
Ejemplo n.º 9
0
class SPARQLEntityLinker(Linker):

    def __init__(self, url="http://dbpedia.org/sparql"):
        self.url = url
        self.sparql = SPARQLWrapper(url)
        self.sparql.setReturnFormat(JSON)
        self.query_processor = Query()
        self.query = '''select distinct ?uri where {
                    ?uri rdfs:label "%s"@en .
                    {?uri rdf:type <http://dbpedia.org/ontology/Place>} UNION
                    {?uri rdf:type <http://dbpedia.org/ontology/Person>} UNION
                    {?uri rdf:type <http://dbpedia.org/ontology/Agent>}
                    }'''
        self.category = {
            "GPE":"http://dbpedia.org/ontology/Country",
            "PERSON":"http://dbpedia.org/ontology/Person",
                         }

    def linking(self, query):
        tokens = self.query_processor.tokenization(query)
        entities = []
        for i in range(len(tokens)):
            entities.append(tokens[i])
        for i in range(len(tokens)-1):
            entities.append(' '.join([tokens[i], tokens[i+1]]))
        e_links = []
        for e in entities:
            q = self.query % e.title()
            self.sparql.setQuery(q)
            results = self.sparql.query().convert()
            e_links += [result["uri"]["value"] for result in results["results"]["bindings"]]
        return e_links
Ejemplo n.º 10
0
def construct():
    app.logger.debug('You arrived at ' + url_for('construct'))
    app.logger.debug('I received the following arguments' + str(request.args))

    endpoint = request.args.get('endpoint', None)
    symptomArray = request.args.getlist('symptomArray[]')
    atonomyArray = request.args.getlist('atonomyArray[]')
    b = bool(int(request.args.get('b', 1)))

    id_tuple = [(x, True) for x in symptomArray] + [(x, False) for x in atonomyArray]

    triple = create_equivelant_class(id_tuple)

    prefix = '\n'.join(['prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>',
                        'prefix owl: <http://www.w3.org/2002/07/owl#>',
                        'prefix xsd: <http://www.w3.org/2001/XMLSchema#>',
                        'prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>'])


    query = '%s \n INSERT DATA { %s }' % (prefix, triple)

    if not b:
        sparql = SPARQLWrapper(endpoint)
        sparql.setQuery(query)
        sparql.query().convert()
        yourDisease = queryYourDisease(endpoint)

        if yourDisease != None:
            return yourDisease
        else:
            return queryDatabase(endpoint, array=[symptomArray, atonomyArray])
    else:
        return queryDatabase(endpoint, array=[symptomArray, atonomyArray])
Ejemplo n.º 11
0
class SparqlEndpoint(object):

    def __init__(self, endpoint, prefixes={}):
        self.sparql = SPARQLWrapper(endpoint)
        self.prefixes = {
            "dbpedia-owl": "http://dbpedia.org/ontology/",
            "owl": "http://www.w3.org/2002/07/owl#",
            "xsd": "http://www.w3.org/2001/XMLSchema#",
            "rdfs": "http://www.w3.org/2000/01/rdf-schema#",
            "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
            "foaf": "http://xmlns.com/foaf/0.1/",
            "dc": "http://purl.org/dc/elements/1.1/",
            "dbpedia2": "http://dbpedia.org/property/",
            "dbpedia": "http://dbpedia.org/",
            "skos": "http://www.w3.org/2004/02/skos/core#",
            "foaf": "http://xmlns.com/foaf/0.1/",
            }
        self.prefixes.update(prefixes)
        self.sparql.setReturnFormat(JSON)

    def query(self, q):
        lines = ["PREFIX %s: <%s>" % (k, r) for k, r in self.prefixes.iteritems()]
        lines.extend(q.split("\n"))
        query = "\n".join(lines)
        print query
        self.sparql.setQuery(query)
        results = self.sparql.query().convert()
        return results["results"]["bindings"]
class DbpediaReader:
    def __init__(self, db):
        self.db = db
        self.sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    @staticmethod
    def __print_query_results(title, offset):
        print(title + " " + str(offset))
        pass

    def __read_results_from_query_resource(self, resource_name, *args):
        query = get_resource(resource_name).format(*args)
        results = self.__exec_query(query)
        return results['results']['bindings']

    def __save_results_from_query_resource_batched(self, save_method, resource_name, *args):
        offset = 0
        while True:
            batch = self.__read_results_from_query_resource(resource_name, *args, offset)
            save_method(batch)
            DbpediaReader.__print_query_results(resource_name, offset)

            if len(batch) < 10000:
                break
            offset += 10000

    def __exec_query(self, query):
        self.sparql.setQuery(query)
        self.sparql.setReturnFormat(JSON)
        return self.sparql.query().convert()

    def save_raw_persons(self):
        return self.__save_results_from_query_resource_batched(self.db.insert_raw_persons, 'person_query.txt')

    def save_raw_roles(self):
        return self.__save_results_from_query_resource_batched(self.db.insert_raw_roles, 'role_query.txt')

    def save_raw_relations(self):
        for relation in Relation:
            self.__save_raw_relations_for_type(relation)

    def __save_raw_relations_for_type(self, relation):
        names = relation.get_relations_names()
        for name in names:
            self.__save_results_from_query_resource_batched(
                lambda data: self.db.insert_raw_relations(DbpediaReader.__create_relation_dict(relation.name, data)),
                'relation_query.txt', name)

    def save_raw_redirects(self):
        self.__save_results_from_query_resource_batched(
            lambda data: self.db.insert_raw_relations(DbpediaReader.__create_relation_dict(Relation.OTHER.name, data)),
            'wiki_redirect_query.txt')

    def save_raw_types(self):
        self.__save_results_from_query_resource_batched(self.db.save_raw_types, 'type_query.txt')

    @staticmethod
    def __create_relation_dict(name, relations):
        # print(relations)
        return dict(type=name, relations=relations)
Ejemplo n.º 13
0
def get_places_within(upper, lower):
    sparql = SPARQLWrapper(app.config['endpoint'])
    sparql.setReturnFormat(JSON)
    sparql.addParameter('Accept', 'application/sparql-results+json')
    sparql.addParameter('reasoning', 'true')
    prefixes = '''
            prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            prefix owl: <http://www.w3.org/2002/07/owl#>
            prefix xsd: <http://www.w3.org/2001/XMLSchema#>
            prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            '''
    query = '''
        select ?place ?lat ?lon where {
               ?place a scr:Place .
               ?place geo:lat ?lat .
               ?place geo:long ?lon .
               FILTER(xsd:float(?lat) < %f &&
                      xsd:float(?lon) < %f &&
                      xsd:float(?lat) > %f &&
                      xsd:float(?lon) > %f )
               }
            '''

    sparql.setQuery(prefixes + query % (upper[0], upper[1], lower[0], lower[1]))  # noqa

    response = sparql.query().convert()
    if response['results']['bindings']:
        return response['results']['bindings']
    else:
        return []
Ejemplo n.º 14
0
class Sparql():
	def __init__(self, resource):
		self.PROPERTIES = ['mother', 'father', 'spouse', 'abstract']
		self.wrapper = SPARQLWrapper("http://dbpedia.org/sparql")
		self.result = self.queryResource(resource)


	def queryResource(self, resource):
		rs = {}
		resource = "<http://dbpedia.org/resource/" + resource + ">"
		self.wrapper.setQuery("""	    
		PREFIX db: <http://dbpedia.org/resource/>
		select ?property ?value 
		where { 
		{
		   """ + resource + """ ?property ?value. 
		}

		}
		""")
		self.wrapper.setReturnFormat(JSON)
		results = self.wrapper.query().convert()

		for result in results['results']['bindings']:
			if any (prop in result['property']['value'] for prop in self.PROPERTIES): 
				if 'xml:lang' in result['value']:
					if result['value']['xml:lang'] == 'en':
						rs[ self.cleanProperty(result['property']['value']) ] = (self.cleanProperty(result['value']['value']) ,result['value']['type'] )
				else:
					rs[ self.cleanProperty(result['property']['value']) ] = (self.cleanProperty(result['value']['value']) ,result['value']['type'] )

		return rs

	def cleanProperty(self, prop):
		return str(prop.split("/")[-1])
def describe(endpoint, query):
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)
    try:
        return sparql.query().convert()
    except RuntimeWarning:
        pass
Ejemplo n.º 16
0
class ReaderPlugin(RDFQueryReader):
    def __init__(self, *args, **kwargs):
        RDFQueryReader.__init__(self, *args, **kwargs)

        self.__endpoint = kwargs['endpoint'] if 'endpoint' in kwargs else None
        self.__results_format = JSON

        self.__sparql_wrapper = SPARQLWrapper(self.__endpoint, self.__results_format)
        if kwargs.get("use_keepalive", "").lower().strip() == "true":
            if hasattr(SPARQLWrapper, "setUseKeepAlive"):
                self.__sparql_wrapper.setUseKeepAlive()

        # Try to use cjson
        try:
            import cjson
            jsonlayer.use("cjson")
            self.log.info("using cjson")
        except:
            self.log.warning("cjson not available, falling back on slower simplejson")

    endpoint = property(lambda self: self.__endpoint)
    results_format = property(lambda self: self.__results_format)

    def _to_table(self, result):
        if not isinstance(result, dict):
            return result

        if not "results" in result:
            return result

        converted = []
        for binding in result["results"]["bindings"]:
            rdf_item = {}
            for key, obj in binding.items():
                try:
                    rdf_item[key] = toRdflib(obj)
                except ValueError:
                    continue

            converted.append(rdf_item)

        return converted

    def _ask(self, result):
        '''
        returns the boolean value of a ASK query
        '''

        return result.get("boolean")

    def execute_sparql(self, q_string, format = 'JSON'):
        try:
            self.log.debug(q_string)
            self.__sparql_wrapper.setQuery(q_string)
            return self.__sparql_wrapper.query().convert()
        except EndPointNotFound, _:
            raise SparqlReaderException("Endpoint not found"), None, sys.exc_info()[2]
        except QueryBadFormed, _:
            raise SparqlReaderException("Bad query: %s" % q_string), None, sys.exc_info()[2]
Ejemplo n.º 17
0
	def query_SPARQL_Endpoint(self, endpoint_URI, query_str):
		try:
			sparql = SPARQLWrapper(endpoint_URI)
			sparql.setQuery(query_str)
			results = sparql.query().info()
			return results
		except Exception, e:
			if LinkedDataProfiler.DEBUG: print('I was not able to execute the SPARQL query against %s\nReason: %s' %(endpoint_URI,e))
	def __init__(self, url, query_head, query_foot):
		'''init SPARQLWrapper with the specified URL, set the return format,
			set the size max_body_size of the parameters of our batch query'''
		SPARQLWrapper.__init__(self, url)
		self.setReturnFormat(JSON)
		self.query_head = query_head
		self.query_foot = query_foot
		self.max_body_size = self.MAX_QUERY_SIZE - len(self.query_head) - len(self.query_foot)
Ejemplo n.º 19
0
class halo:
	def __init__(self):
		self.config = json.load(open("../config/config.json"))
		self.sparql = SPARQLWrapper("http://dbpedia.org/sparql")
		self.termDB = MongoClient()["semantified"]["terms"]
		self.halodb = MongoClient()["halo"]["halos"]
	def run(self,query):
		try:
			self.sparql.setQuery(query)
			self.sparql.setReturnFormat(JSON)
			result = self.sparql.query()
			#jsonlayer.use('cjson')
			body = result.response.read().encode('ascii','ignore')
			fixed_body = body.decode("ascii")
			result = jsonlayer.decode(fixed_body)
			return result["results"]["bindings"]
		except :
			print(query)
			time.sleep(60)
			return self.run(query)

	def makeQuery(self,uri,querykey):
		return  self.config[querykey] % (uri)
	def insert(self,obj):
		self.halodb.update({"_id":obj["_id"]},obj,True)
	def isprocessed(self,uri):
		return len(list(self.halodb.find({"_id" : uri}))) > 0
	def getHalo(self,uri):
		if not self.isprocessed(uri):
			query = self.makeQuery(uri,"queryone")
			result = self.run(query)
			query = self.makeQuery(uri,"querytwo")
			result.extend(self.run(query))
			halo = {}
			halo["_id"] = uri
			halo["uri"] = uri
			halo["halo"] = {}
			for each in result:
				halouri = each["aura"]["value"]
				halo["halo"][halouri.replace(".","$")] = {}
				obj = {}
				obj["halouri"] = halouri
				obj["count"] = each["auraCount"]["value"]
				obj["label"] = each["label"]["value"]
				halo["halo"][halouri.replace(".","$")] = obj
			self.insert(halo)
			print("processed halo for : " + uri)
		else :
			print("previously processed uri : " + uri )
	def getdatadb(self):
		return self.termDB.find(timeout=False)
	
	def processhalofromdb(self):
		data = self.getdatadb()
		for each in data :
			alluri = each["allURI"]
			map(self.getHalo,alluri)
Ejemplo n.º 20
0
 def get_triples(self):
     sparql = SPARQLWrapper(self.endpoint)
     sparql.setQuery(self.queries["describe"] % (self.uri, self.graph))
     g = sparql.query().convert()
     logging.debug("Returning %d triples describing resource <%s>" % (len(g), self.uri))
     #FIXME: enrich with metadata
     for prefix, namespace in self.conf.data.namespaces():
         g.bind(prefix, namespace) 
     return g
Ejemplo n.º 21
0
def sendSparqlQuery(query, endpoint, reasoning='false'):
    sparql = SPARQLWrapper(endpoint)
    sparql.setQuery(query)

    sparql.setReturnFormat(JSON)
    sparql.addParameter('Accept','application/sparql-results+json')
    sparql.addParameter('reasoning', reasoning)

    return sparql.query().convert()
Ejemplo n.º 22
0
def query_tunnel(request):
    query = request.GET.get('query')
    sparql = SPARQLWrapper(endpoint)
    if bg_user and bg_pw:
        sparql.setHTTPAuth(BASIC)
        sparql.setCredentials(bg_user, bg_pw)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    return JsonResponse(results, safe=False)
Ejemplo n.º 23
0
 def insert(self, insQuery): 	
 	print insQuery
     sparql = SPARQLWrapper("http://localhost:8890/sparql")
     sparql.setQuery("""
        	PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
     	INSERT DATA 
     	INTO <http://localhost:8890/DAV/home/smob>
     	{ <s> <p> <q> }
 	""")
     results=sparql.query().convert()
Ejemplo n.º 24
0
 def get_proxy(self, uri):
     '''
     Returns the description of a proxy entity
     '''
     logger.info('Get proxy data about {}'.format(uri))
     query = self._queries['get_proxy.rq'].replace("__URI__", uri)
     sparql = SPARQLWrapper(self.sparql)
     sparql.setQuery(query)
     data = sparql.query().convert()
     return data
Ejemplo n.º 25
0
class ReactomeDataSource(object):
    def __init__(self):
        self.sparql = SPARQLWrapper("https://www.ebi.ac.uk/rdf/services/reactome/sparql")

    def create_reactions(self, taxon, model):
        self.sparql.setQuery("""

PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX dc: <http://purl.org/dc/elements/1.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX biopax3: <http://www.biopax.org/release/biopax-level3.owl#>

SELECT DISTINCT ?pathway ?pathwayname ?rea ?lid ?rid
WHERE
{
 ?pathway rdf:type biopax3:Pathway . 
 ?pathway biopax3:displayName ?pathwayname . 
 ?pathway biopax3:organism <http://identifiers.org/taxonomy/""" + taxon + """> .
 ?pathway biopax3:pathwayComponent ?rea .
 ?rea biopax3:left ?l .
 ?l biopax3:entityReference ?lid .
 ?rea biopax3:right ?r .
 ?r biopax3:entityReference ?rid .
}

        """)
        self.sparql.setReturnFormat(JSON)
        results = self.sparql.query().convert()

        lefts = defaultdict(list)
        rights = defaultdict(list)
        for r in results['results']['bindings']:
            lefts[r['rea']['value']].append(r['lid']['value'])
            rights[r['rea']['value']].append(r['rid']['value'])

        for r in lefts.keys():
            rr = ReactionRule()
            for l in set(lefts[r]):
                #print(l.split("/")[-1])
                try:
                    rr.add_reactant(Species(l.split("/")[-1]))
                except:
                    pass
            for r in set(rights[r]):
                #print(r.split("/")[-1])
                try:
                    rr.add_product(Species(r.split("/")[-1]))
                except:
                    pass
            model.add_reaction_rule(rr)
Ejemplo n.º 26
0
def query(endpoint='', query=''):
    sparql = SPARQLWrapper(endpoint, returnFormat="json")
    sparql.setQuery(query)

    try:
        ret = sparql.queryAndConvert()
    except:
        print('Query Error')
        return []

    return ret['results']['bindings']
Ejemplo n.º 27
0
class VirtuosoBackend(Backend):
    def __init__(self, address):
        self.endpoint = SPARQLWrapper(address + "/sparql/")

    def query(self, query):
        self.endpoint.setQuery(query)

        self.endpoint.setReturnFormat(JSON)
        results = self.endpoint.query().convert()

        return results
Ejemplo n.º 28
0
 def insert(self, insQuery):
     """This function takes in a insert statment and returns 
     whether it was executed fine or not"""
  	print "Triples: "+insQuery
 	sparql = SPARQLWrapper("http://localhost:8890/sparql")
     sparql.setQuery("""
        	PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
     	INSERT DATA 
     	INTO <http://localhost:8890/DAV/home/test>
     	{ """+insQuery+"""}
 	""")
     results=sparql.query().convert()
Ejemplo n.º 29
0
 def insert(self, insQuery):
     """This function takes in a insert statment and returns 
     whether it was executed fine or not"""
  	print "Triples: "+insQuery
 	sparql = SPARQLWrapper("http://knoesis-twit.cs.wright.edu:8890/sparql")
     sparql.setQuery("""
        	PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
     	INSERT DATA 
     	INTO <http://knoesis-twit.cs.wright.edu/dav/dbpedia/categories>
     	{ """+insQuery+"""}
 	""")
     results=sparql.query().convert()
Ejemplo n.º 30
0
class SPARQLEndpoint(object):

    # init with endpoint URL
    def __init__(self, endpoint):
        self.sparql = SPARQLWrapper(endpoint)
        self.sparql.setReturnFormat(JSON)

        # delegate SPARQL query to endpoint

    def query(self, q):
        self.sparql.setQuery(q)
        return self.sparql.query().convert()
Ejemplo n.º 31
0
def findParents(URI):
    # Returns a pathList which includes all parents per hop in tuples [(child,parent),(child,parent)]
    global iup, pathList, endpoint
    list_out = []
    iup += 1
    if iup == 1:
        sparql = SPARQLWrapper(endpoint)
        sparql.addCustomParameter("infer", "false")
        sparql.setReturnFormat(JSON)
        querystring = 'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?super WHERE { <' + URI[
            iup - 1][0] + '> rdfs:subClassOf ?super . FILTER isURI(?super) }'
        sparql.setQuery(querystring)
        results = sparql.query().convert()
        for x in results["results"]["bindings"]:
            list_out.append((URI[iup - 1][0], x["super"]["value"]))
    else:
        for i in range(len(URI[iup - 1])):
            sparql = SPARQLWrapper(endpoint)
            sparql.addCustomParameter("infer", "false")
            sparql.setReturnFormat(JSON)
            querystring = 'PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> SELECT DISTINCT ?super WHERE { <' + URI[
                iup -
                1][i][1] + '> rdfs:subClassOf ?super . FILTER isURI(?super) }'
            sparql.setQuery(querystring)
            results = sparql.query().convert()
            for x in results["results"]["bindings"]:
                list_out.append((URI[iup - 1][i][1], x["super"]["value"]))

    if len(list_out) > 0:
        URI.append(list_out)
        findParents(URI)
    else:
        iup = 0
        pathList = URI
        return pathList
Ejemplo n.º 32
0
        try:
            query = query + ' LIMIT %s' % int(getattr(context, LIMIT))
        except (ValueError, TypeError, AttributeError):
            pass
        try:
            query = query + ' OFFSET %s' % int(getattr(context, OFFSET))
        except (ValueError, TypeError, AttributeError):
            pass

        self.resetQuery()
        if self._is_contextual(context):
            self.addParameter("default-graph-uri", context.identifier)
        self.timeout = self._timeout
        self.setQuery(query)

        doc = ElementTree.parse(SPARQLWrapper.query(self).response)
        # ElementTree.dump(doc)
        for rt, vars in _traverse_sparql_result_dom(
                doc, as_dictionary=True,
                node_from_result=self.node_from_result):
            yield (rt.get(s, s), rt.get(p, p), rt.get(o, o)), None

    def triples_choices(self, (subject, predicate, object_), context=None):
        """
        A variant of triples that can take a list of terms instead of a
        single term in any slot.  Stores can implement this to optimize
        the response time from the import default 'fallback' implementation,
        which will iterate over each term in the list and dispatch to
        triples.
        """
        raise NotImplementedError('Triples choices currently not supported')
Ejemplo n.º 33
0
def get_types_d(resource):
    '''
    given a single resource
    return every predicate
    in a common dict with the predicate name as key
    and the corresponding url and ns
    e.g: Jacques_Tati
    '''
    ns = "http://dbpedia.org"
    dtype = "resource"
    #prefix db-owl: <http://dbpedia.org/ontology/>
    q = '''
    prefix db-owl: <http://dbpedia.org/ontology/>
    SELECT ?type WHERE {

        <http://dbpedia.org/resource/%s> rdf:type ?type .
    }

    ''' %(resource)
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery(q)
    sparql.setReturnFormat(JSON)
    is_type_of = {}
    results = sparql.query().convert()
    nb_results = len(results["results"]["bindings"])
    if nb_results == 0:
        raise Exception("No results found for %s" %resource)
    type_urls = [r["type"]["value"] for r in results["results"]["bindings"]]

    for val in type_urls:
    # for r in results["results"]["bindings"]:
        if "#" in val:
            ns = val.split("#")[0]
            type_v = val.split("#")[-1]
            type_v = re.sub('s$', '', type_v)

        else:
            ns = "/".join(val.split("/")[:-1])

            if "entity" in val:
                #wikidata IDS
                r = requests.get(val)
                r_json =r.json()
                type_v = [e["labels"]["en"]["value"] for e in r_json["entities"].values()][0].lower()
            else:
                m = re.match('(?P<name>.*?)(?P<id>\d+)$', val)
                if m is not None:
                    type_v = m.group("name").split("/")[-1]
                else:
                    if "Yago" in val or "Wikicat" in val:
                        type_v = re.split("/(Yago|Wikicat)", val)[-1]
                    else:
                        type_v = val.split("/")[-1]
        #finally mapping
        if type_v in is_type_of.keys():
            is_type_of[type_v]["urls"].append(val)
            is_type_of[type_v]["ns"].append(ns)
        else:
            is_type_of[type_v] = {"urls": [val], "ns":[ns]}
        is_type_of[type_v]["resource"] = resource

    return is_type_of
Ejemplo n.º 34
0
class SEARCH:
    def __init__(self, url_pbg):
        #define cache directory
        self.cache = "cache/"
        #define url
        self.url_pbg = url_pbg
        #define sparql
        self.sparql_pbg = SPARQLWrapper(self.url_pbg)
        self.sparql_pbg.setReturnFormat(JSON)

    def cache_name(self, method, parameters):
        key = method + "_" + hashlib.md5(pickle.dumps(parameters)).hexdigest()
        return (key)

    def get_location(self, id):
        filename = self.cache + self.cache_name("get_location", id)
        try:
            infile = open(filename, "rb")
            new_object = pickle.load(infile)
            infile.close()
            return (new_object)
        except FileNotFoundError:
            file = open("queries/gene_location.sparql", "r")
            query = file.read()
            file.close()
            self.sparql_pbg.setQuery(query % id)
            # JSON example
            response = self.sparql_pbg.query().convert()
            result = []
            if response["results"]["bindings"]:
                for item in response["results"]["bindings"]:
                    result.append([
                        item["gene_id"]["value"], item["chromosome"]["value"],
                        item["begin_ref"]["value"], item["begin_pos"]["value"],
                        item["end_ref"]["value"], item["end_pos"]["value"]
                    ])
                df = pd.DataFrame(result)
                df.columns = [
                    "gene_id", "chromosome", "begin_ref", "begin_pos",
                    "end_ref", "end_pos"
                ]
                df = df.set_index("gene_id")
                df["begin_pos"] = pd.to_numeric(df["begin_pos"])
                df["end_pos"] = pd.to_numeric(df["end_pos"])
                #cache
                outfile = open(filename, "wb")
                pickle.dump(df, outfile)
                outfile.close()
                return df
            else:
                return pd.DataFrame()

    def compute_interval(self, g1, g2):
        locations = pd.concat([self.get_location(g1), self.get_location(g2)])
        display(locations[["location"]])
        if (len(locations.index) != 2):
            print("unexpected number of rows in locations:",
                  len(locations.index))
        elif (locations.iloc[0]['end_pos'] >
              locations.iloc[1]['begin_pos']) & (g1 != g2):
            print("unexpected order", locations.index[0], "and",
                  locations.index[1])
        else:
            result = []
            if locations.iloc[0]["end_pos"] > locations.iloc[0]["begin_pos"]:
                result.append([
                    "begin", locations.iloc[0]["end_ref"],
                    locations.iloc[0]["end_pos"]
                ])
            else:
                result.append([
                    "begin", locations.iloc[0]["begin_ref"],
                    locations.iloc[0]["begin_pos"]
                ])
            if locations.iloc[1]["begin_pos"] < locations.iloc[1]["end_pos"]:
                result.append([
                    "end", locations.iloc[1]["begin_ref"],
                    locations.iloc[1]["begin_pos"]
                ])
            else:
                result.append([
                    "end", locations.iloc[1]["end_ref"],
                    locations.iloc[1]["end_pos"]
                ])
            df = pd.DataFrame(result)
            df.columns = ["type", "ref", "pos"]
            df = df.set_index("type")
            return df

    def make_interval(self, ref, start, end):
        result = []
        result.append(["begin", ref, start])
        result.append(["end", ref, end])
        df = pd.DataFrame(result)
        df.columns = ["type", "ref", "pos"]
        df = df.set_index("type")
        return df

    def interval_genes(self, interval):
        filename = self.cache + self.cache_name("interval_genes", interval)
        try:
            infile = open(filename, "rb")
            new_object = pickle.load(infile)
            infile.close()
            return (new_object)
        except FileNotFoundError:
            file = open("queries/interval_genes.sparql", "r")
            query = file.read()
            file.close()
            self.sparql_pbg.setQuery(
                query % {
                    "beginRef": interval.loc["begin"]["ref"],
                    "beginPos": interval.loc["begin"]["pos"],
                    "endRef": interval.loc["end"]["ref"],
                    "endPos": interval.loc["end"]["pos"]
                })
            # JSON example
            response = self.sparql_pbg.query().convert()
            result = []
            if response["results"]["bindings"]:
                for item in response["results"]["bindings"]:
                    row = []
                    row.append(item["gene_id"]["value"])
                    row.append(item["chromosome"]["value"])
                    row.append(item["begin_pos"]["value"])
                    row.append(item["end_pos"]["value"])
                    result.append(row)
                df = pd.DataFrame(result)
                df.columns = ["gene_id", "chromosome", "start", "end"]
                #cache
                outfile = open(filename, "wb")
                pickle.dump(df, outfile)
                outfile.close()
                return df
            else:
                return pd.DataFrame()

    def go_genes(self, graphEnsembl, graphUniprot, go):
        filename = self.cache + self.cache_name(
            "go_genes", [graphEnsembl, graphUniprot, go])
        try:
            infile = open(filename, "rb")
            new_object = pickle.load(infile)
            infile.close()
            return (new_object)
        except FileNotFoundError:
            file = open("queries/go_genes.sparql", "r")
            query = file.read()
            file.close()
            self.sparql_pbg.setQuery(
                query % {
                    "graphEnsembl": graphEnsembl,
                    "graphUniprot": graphUniprot,
                    "go": go
                })
            # JSON example
            response = self.sparql_pbg.query().convert()
            result = []
            if response["results"]["bindings"]:
                for item in response["results"]["bindings"]:
                    row = []
                    row.append(item["gene_count"]["value"])
                    row.append(item["gene_with_go_count"]["value"])
                    result.append(row)
                df = pd.DataFrame(result)
                df.columns = ["gene_count", "gene_with_go_count"]
                #cache
                outfile = open(filename, "wb")
                pickle.dump(df, outfile)
                outfile.close()
                return df
            else:
                return pd.DataFrame()

    def gene_goterms(self, id):
        filename = self.cache + self.cache_name("gene_goterms", id)
        try:
            infile = open(filename, "rb")
            new_object = pickle.load(infile)
            infile.close()
            return (new_object)
        except FileNotFoundError:
            file = open("queries/gene_goterm.sparql", "r")
            query = file.read()
            file.close()
            self.sparql_pbg.setQuery(query % id)
            # JSON example
            response = self.sparql_pbg.query().convert()
            result = []
            if response["results"]["bindings"]:
                for item in response["results"]["bindings"]:
                    row = []
                    row.append(item["gene_id"]["value"])
                    row.append(item["go_id"]["value"])
                    row.append(item["go_term"]["value"])
                    row.append(item["go_cat"]["value"])
                    row.append(item["graph_ensembl"]["value"])
                    row.append(item["graph_uniprot"]["value"])
                    result.append(row)
                df = pd.DataFrame(result)
                df.columns = [
                    "gene_id", "go_id", "go_term", "go_cat", "graph_ensembl",
                    "graph_uniprot"
                ]
                #cache
                outfile = open(filename, "wb")
                pickle.dump(df, outfile)
                outfile.close()
                return df
            else:
                return pd.DataFrame()

    def genes_goterms(self, ids):
        list = []
        for id in ids:
            list.append(self.gene_goterms(id))
        return pd.concat(list).reset_index(drop=True)

    def get_go_numbers(self, goterms, genes):
        #construct the number of genes with/without goterm
        graphs = list(
            goterms.groupby(["graph_ensembl", "graph_uniprot"]).indices.keys())
        golist = goterms["go_id"].unique()
        #construct df
        df = pd.DataFrame(goterms.groupby("go_id").size(),
                          columns=["interval_genes_annotated"])
        #add gene numbers
        df["interval_genes_not_annotated"] = len(
            genes.index) - df["interval_genes_annotated"]
        df["outside_genes_annotated"] = 0
        df["outside_genes_not_annotated"] = 0
        df["total_genes"] = 0
        for go in golist:
            for graph in graphs:
                result = self.go_genes(graph[0], graph[1], go)
                df.loc[go, "outside_genes_annotated"] = df.loc[
                    go, "outside_genes_annotated"] + int(
                        result.loc[0, "gene_with_go_count"])
                df.loc[go, "total_genes"] = df.loc[go, "total_genes"] + int(
                    result.loc[0, "gene_count"])
            df.loc[go, "outside_genes_annotated"] = df.loc[
                go,
                "outside_genes_annotated"] - df.loc[go,
                                                    "interval_genes_annotated"]
            df.loc[go, "outside_genes_not_annotated"] = df.loc[
                go, "total_genes"] - df.loc[
                    go, "outside_genes_annotated"] - df.loc[
                        go, "interval_genes_annotated"] - df.loc[
                            go, "interval_genes_not_annotated"]
        #do fisher tests
        for go in golist:
            m = [[
                df.loc[go, "interval_genes_annotated"],
                df.loc[go, "outside_genes_annotated"]
            ],
                 [
                     df.loc[go, "interval_genes_not_annotated"],
                     df.loc[go, "outside_genes_not_annotated"]
                 ]]
            df.loc[go, "p_less"] = stats.fisher_exact(m, alternative="less")[1]
            df.loc[go,
                   "p_greater"] = stats.fisher_exact(m,
                                                     alternative="greater")[1]
        df["p_adjusted"] = p_adjust(df["p_greater"], method="BH")
        return df
class WDSparqlQueries(object):
    """
    params: optional depending on type of query (for qid provide prop and string, for label provide qid)
    extendable wrapper for sparql queries in WD
    """
    def __init__(self, qid=None, prop=None, string=None):
        self.qid = qid
        self.prop = prop
        self.string = string
        self.endpoint = SPARQLWrapper("https://query.wikidata.org/bigdata/namespace/wdq/sparql")
        self.wd = 'PREFIX wd: <http://www.wikidata.org/entity/>'
        self.wdt = 'PREFIX wdt: <http://www.wikidata.org/prop/direct/>'

    def execute_query(self, query):
        self.endpoint.setQuery(query)
        self.endpoint.setReturnFormat(JSON)
        return self.endpoint.query().convert()

    def wd_prop2qid(self):
        """
        :param prop: 'P351' Entrez gene id (ex. print( SPARQL_for_qidbyprop('P351','899959')))
        :param string: '899959' String value
        :return: QID Q21514037
        """
        arguments = '?gene wdt:{} "{}"'.format(self.prop, self.string)
        select_where = 'SELECT * WHERE {{{}}}'.format(arguments)
        query = self.wdt + " " + select_where
        results = self.execute_query(query)
        final_qid = []
        try:
            rawqid = results['results']['bindings'][0]['gene']['value']
            qid_list = rawqid.split('/')
            final_qid.append(qid_list[-1])
        except Exception:
            final_qid.append('None')
        return final_qid[0]

    def wd_qid2label(self):
        """
        :param string: 'Q2458943' String value
        :return: QID 'Label'
        """
        arguments = ' wd:{} rdfs:label ?label. Filter (LANG(?label) = "en") .'.format(self.qid)
        select_where = 'SELECT ?label WHERE {{{}}}'.format(arguments)
        query = self.wd + " " + select_where
        results = self.execute_query(query)
        final_qid = []
        try:
            rawqid = results['results']['bindings'][0]['label']['value']
            final_qid.append(rawqid)
        except Exception:
            final_qid.append('None')
        return final_qid[0]

    def wd_qid2property(self):
        """
        :param string: 'Q2458943' String value
        :return: "property value'
        """
        arguments = ' wd:{} wdt:{} ?prop.'.format(self.qid, self.prop)
        select_where = 'SELECT ?prop WHERE {{{}}}'.format(arguments)
        query = self.wd + " " + select_where
        results = self.execute_query(query)
        final_qid = []
        try:
            rawqid = results['results']['bindings'][0]['prop']['value']
            final_qid.append(rawqid)
        except Exception:
            final_qid.append('None')
        return final_qid[0].split("/")[-1]
Ejemplo n.º 36
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

import rdflib
import os
import shutil
from json import JSONEncoder
from SPARQLWrapper import SPARQLWrapper, JSON

tps_graph = "http://vitali.web.cs.unibo.it/raschietto/graph/ltw1543"

query = """SELECT ?s ?p ?o {
    GRAPH <%s> {?s ?p ?o .}
}""" % (tps_graph)

# NB: Usare 'DELETE' al posto di 'INSERT' per rimuovere
# i dati dal triplestore

sparql = SPARQLWrapper("http://tweb2015.cs.unibo.it:8080/data/query", returnFormat="json")
sparql.setQuery(query)
sparql.setMethod('POST')
q = sparql.query()
print JSONEncoder().encode(q.convert())
Ejemplo n.º 37
0
def generate_hcls_from_sparql(sparql_endpoint, rdf_distribution_uri,
                              g=Graph()):
    """Query the provided SPARQL endpoint to compute HCLS metadata"""
    sparql = SPARQLWrapper(sparql_endpoint)
    root = pathlib.Path(__file__).parent.resolve()
    with open(root / '../FAIRMETADATA_FAILED_QUERIES.md', 'w') as f:
        f.write('# Failing HCLS SPARQL queries\n\n\n')
    with open(root / '../FAIRMETADATA_SUCCESS_QUERIES.md', 'w') as f:
        f.write('# Generated HCLS metadata\n\n\n')

    query_prefixes = """PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX dqv: <http://www.w3.org/ns/dqv#>
PREFIX hcls: <http://www.w3.org/hcls#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX dctypes: <http://purl.org/dc/dcmitype/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX void: <http://rdfs.org/ns/void#>
PREFIX void-ext: <http://ldf.fi/void-ext#>\n"""

    query_select_all_graphs = 'SELECT DISTINCT ?graph WHERE { GRAPH ?graph {?s ?p ?o} }'
    sparql.setQuery(query_select_all_graphs)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    # print('Get all graphs query Results:')
    # print(results)
    select_all_graphs_results = results["results"]["bindings"]

    # Compute HCLS metadata per graph
    for graph_row in select_all_graphs_results:
        graph = graph_row['graph']['value']
        print('Computing HCLS metadata for graph ' + graph)
        for filename in os.listdir(
                pkg_resources.resource_filename('fair_metadata', 'queries')):
            with open(
                    pkg_resources.resource_filename('fair_metadata',
                                                    'queries/' + filename),
                    'r') as f:
                if (graph):
                    sparql_query = f.read().replace('?_graph_uri', graph)
                    sparql_query = sparql_query.replace(
                        '<?_graph_start>', 'GRAPH <' + graph + '> {')
                    sparql_query = sparql_query.replace('<?_graph_end>', '}')
                else:
                    sparql_query = f.read().replace('?_graph_uri',
                                                    rdf_distribution_uri)
                    sparql_query = sparql_query.replace('<?_graph_start>', '')
                    sparql_query = sparql_query.replace('<?_graph_end>', '')

                complete_query = query_prefixes + sparql_query
                # print(complete_query)

                try:
                    sparql.setQuery(complete_query)
                    sparql.setReturnFormat(TURTLE)
                    # sparql.setReturnFormat(JSONLD)
                    results = sparql.query().convert()
                    # g.parse(data=results, format="turtle")
                    # g.parse(data=results, format="json-ld")

                    hcls_graph = Graph()
                    hcls_graph.parse(data=results, format="turtle")
                    g += hcls_graph
                    with open(root / '../FAIRMETADATA_SUCCESS_QUERIES.md',
                              'a') as f:
                        f.write('## Returned RDF \n\n```turtle\n' +
                                results.decode('utf-8') + "\n```\n\n" +
                                'Query: \n\n```sparql\n' + complete_query +
                                "\n```\n\n" + 'In SPARQL endpoint: ' +
                                sparql_endpoint + "\n\n---\n")
                except Exception as e:
                    print('SPARQL query failed:')
                    print(complete_query)
                    print(e)
                    with open(root / '../FAIRMETADATA_FAILED_QUERIES.md',
                              'a') as f:
                        f.write('## Query failed \n\n```sparql\n' +
                                complete_query + "\n```\n\n" +
                                'In SPARQL endpoint: ' + sparql_endpoint +
                                "\n> " + str(e) + "\n\n---\n")

    # print(g.serialize(format='json-ld', indent=4))
    # print(g.serialize(format='turtle', indent=4))
    return g


# {
#   "@context": "/contexts/GraphMap",
#   "@id": "/graph_maps",
#   "@type": "hydra:Collection",
#   "hydra:member": [
#     {
#       "@id": "/graph_maps/3",
#       "@type": "http://example.org/GraphMap",
#       "subjectType": "http://www.w3.org/2000/01/rdf-schema#Resource",
#       "predicate": "http://semanticscience.org/resource/has-participant",
#       "objectType": "http://www.w3.org/2000/01/rdf-schema#Resource",
#       "dataset": "/datasets/3",
#       "id": 3
#     },
#     {
#       "@id": "/graph_maps/4",
#       "@type": "http://example.org/GraphMap",
#       "subjectType": "http://www.w3.org/2000/01/rdf-schema#Resource",
#       "predicate": "http://www.w3.org/1999/02/22-rdf-syntax-ns#type",
#       "objectType": "http://www.ebi.ac.uk/efo/EFO_0001067",
#       "dataset": "/datasets/3",
#       "id": 4
#     }
#   ],
#   "hydra:totalItems": 2
# }

# {
#   "@context": "/contexts/Dataset",
#   "@id": "/datasets",
#   "@type": "hydra:Collection",
#   "hydra:member": [
#     {
#       "@id": "/datasets/3",
#       "@type": "http://www.w3.org/ns/dcat#Dataset",
#       "identifier": "mw1",
#       "title": "Infections",
#       "description": "A dataset of infections",
#       "publisher": "http://fairdata.systems",
#       "license": "http://fairdata.systems/dataset/infections/license",
#       "publicationDate": "2020-11-12T11:25:00+00:00",
#       "publisher_name": "Mark Wilkinson",
#       "graphmaps": [
#         "/graph_maps/3",
#         "/graph_maps/4"
#       ],
#       "dataservices": [
#         "/data_services/1"
#       ],
#       "id": 3
#     }
#   ],
#   "hydra:totalItems": 1
# }

# {
#   "@context": "/contexts/DataService",
#   "@id": "/data_services",
#   "@type": "hydra:Collection",
#   "hydra:member": [
#     {
#       "@id": "/data_services/1",
#       "@type": "http://www.w3.org/ns/dcat#DataService",
#       "name": "Infections endpoint",
#       "description": "A SPARQL endpoint with infection data",
#       "url": "http://fairdata.systems:8990/sparql",
#       "serviceType": "SPARQL",
#       "conformsTo": "https://www.w3.org/TR/sparql11-overview/",
#       "publisher": "http://fairdata.systems",
#       "dataset": "/datasets/3",
#       "id": 1
#     }
#   ],
#   "hydra:totalItems": 1
# }
Ejemplo n.º 38
0
class WriterPlugin(RDFWriter):
    def __init__(self, reader, *args, **kwargs):
        super(WriterPlugin, self).__init__(reader, *args, **kwargs)

        if isinstance(self.reader, ReaderPlugin):
            self._endpoint = self.reader.endpoint
        else:
            self._endpoint = kwargs.get("endpoint")

        self._combine_queries = kwargs.get("combine_queries")
        self._results_format = JSON

        self._sparql_wrapper = SPARQLWrapper(self._endpoint,
                                             returnFormat=self._results_format)
        user = kwargs.get('user', None)
        password = kwargs.get('password', None)
        if user is not None and password is not None:
            self._sparql_wrapper.setCredentials(user, password)

        self._sparql_wrapper.setMethod("POST")

        default_graph = kwargs.get('default_graph', None)
        if default_graph:
            self._sparql_wrapper.addDefaultGraph(default_graph)

    @property
    def endpoint(self):
        return self._endpoint

    def _save(self, *resources):
        for context, items in _group_by_context(resources).items():
            # Deletes all triples with matching subjects.
            remove_query = _prepare_delete_many_query(items, context)
            insert_query = _prepare_add_many_query(items, context)
            self._execute(remove_query, insert_query)

    def _update(self, *resources):
        for context, items in _group_by_context(resources).items():
            # Explicitly enumerates triples for deletion.
            remove_query = _prepare_selective_delete_query(items, context)
            insert_query = _prepare_add_many_query(items, context)
            self._execute(remove_query, insert_query)

    def _remove(self, *resources, **kwargs):
        for context, items in _group_by_context(resources).items():
            # Deletes all triples with matching subjects.
            inverse = kwargs.get("inverse")
            query = _prepare_delete_many_query(items, context, inverse)
            self._execute(query)

    def _size(self):
        """ Return total count of triples, not implemented. """
        raise NotImplementedError

    def _add_triple(self, s=None, p=None, o=None, context=None):
        self._add(s, p, o, context)

    def _set_triple(self, s=None, p=None, o=None, context=None):
        self._remove_from_endpoint(s, p, context=context)
        self._add(s, p, o, context)

    def _remove_triple(self, s=None, p=None, o=None, context=None):
        self._remove_from_endpoint(s, p, o, context)

    def _execute(self, *queries):
        """ Execute several queries. """

        translated = [str(query) for query in queries]
        if self._combine_queries:
            translated = ["\n".join(translated)]

        try:
            for query_str in translated:
                debug(query_str)

                self._sparql_wrapper.setQuery(query_str)
                self._sparql_wrapper.query()

            return True

        except EndPointNotFound as _:
            raise_(SparqlWriterException, "Endpoint not found",
                   sys.exc_info()[2])
        except QueryBadFormed as _:
            raise_(SparqlWriterException, "Bad query: %s" % query_str,
                   sys.exc_info()[2])
        except Exception as e:
            msg = "Exception: %s (query: %s)" % (e, query_str)
            raise_(SparqlWriterException, msg, sys.exc_info()[2])

    def _add_many(self, triples, context=None):
        debug("ADD several triples")
        query = insert()

        if context:
            query.into(context)

        for s, p, o in triples:
            query.template((s, p, o))

        query_str = str(query)
        try:
            debug(query_str)
            self._sparql_wrapper.setQuery(query_str)
            self._sparql_wrapper.query().convert()
            return True

        except EndPointNotFound as _:
            raise_(SparqlWriterException, "Endpoint not found",
                   sys.exc_info()[2])
        except QueryBadFormed as _:
            raise_(SparqlWriterException, "Bad query: %s" % query_str,
                   sys.exc_info()[2])
        except Exception as e:
            raise_(SparqlWriterException, "Exception: %s" % e,
                   sys.exc_info()[2])

    def _add(self, s, p, o, context=None):
        return self._add_many([(s, p, o)], context)

    def _remove_from_endpoint(self, s=None, p=None, o=None, context=None):
        debug('REM : %s, %s, %s, %s' % (s, p, o, context))

        query = delete()
        try:
            if s is None and p is None and o is None and context:
                query = clear().graph(context)
            else:
                if context:
                    query = delete().from_(context)

                query.template(("?s", "?p", "?o"))

                if context:
                    where_group = NamedGroup(context)
                else:
                    where_group = Group()

                where_group.append(("?s", "?p", "?o"))
                filters = self.__build_filter(s, p, o)
                if filters:
                    filter = Filter("({0})".format(filters))
                    where_group.append(filter)
                query.where(where_group)

            query_str = str(query)
            debug(query_str)
            self._sparql_wrapper.setQuery(query_str)
            self._sparql_wrapper.query().convert()
            return True
        except EndPointNotFound as _:
            error("SPARQL endpoint not found")
        except QueryBadFormed as _:
            error("Bad-formed SPARQL query")
        except SPARQLWrapperException as _:
            error("SPARQLWrapper exception")

        return None

    def __build_filter(self, s, p, o):
        vars = [(s, '?s'), (p, '?p'), (o, '?o')]
        parts = []
        for var in vars:
            if var[0] is not None:
                parts.append("%s = %s" % (var[1], self._term(var[0])))

        return " and ".join(parts)

    def index_triples(self, **kwargs):
        """
        performs index of the triples if such functionality is present,
        returns True if operation successful
        """
        # SPARQL/Update does not support indexing operation
        return False

    def load_triples(self, source=None, context=None):
        """
        Load resources on the web into the triple-store.

        :param str source: path to the sources of triples to load
        :param context: the given context
        :return: True if successful
        :rtype: bool
        """
        if source:
            query = load()
            query.load(remote_uri=source)

            if context:
                query.into(context)

            query_str = str(query)
            debug(query_str)
            self._sparql_wrapper.setQuery(query_str)
            self._sparql_wrapper.query().convert()
            return True

        return False

    def _clear(self, context=None):
        """
        Clear the triple-store.
        """
        self._remove_from_endpoint(None, None, None, context=context)

    def _term(self, term):
        if isinstance(term, (URIRef, BNode)):
            return u'{0:s}'.format
        elif isinstance(term, (str, str)):
            if term.startswith('?'):
                return u'{0:s}'.format(term)
            elif is_uri(term):
                return u'<{0:s}>'.format(term)
            else:
                return u'"{0:s}"'.format(term)
        elif type(term) is Literal:
            return term.n3()
        elif isinstance(term, (list, tuple)):
            return '"{0:s}"@{1:s}'.format(term[0], term[1])
        elif type(term) is type and hasattr(term, 'uri'):
            return u'{0:s}'.format
        elif hasattr(term, 'subject'):
            return u'{0:s}'.format

        return term.__str__()
Ejemplo n.º 39
0
    def run_query(self, query):
        all_results = []
        tries = []

        app.logger.debug(self.endpoints)

        get_tries = [(endpoint, 'GET') for endpoint in self.endpoints]
        post_tries = [(endpoint, 'POST') for endpoint in self.endpoints]

        app.logger.debug(get_tries)
        app.logger.debug(post_tries)

        tries.extend(post_tries)
        tries.extend(get_tries)

        app.logger.debug(tries)
        for (endpoint, method) in tries:
            try:
                sw = SPARQLWrapper(endpoint)
                sw.setMethod(method)
                sw.setReturnFormat(JSON)
                sw.setQuery(query)

                app.logger.debug("Calling endpoint {}".format(endpoint))

                # Will give problems if e.g. the GET URI is too long, or the endpoint does not respond within reasonable time.
                results = sw.queryAndConvert()

                app.logger.debug("Done")

                # Will give problems if the return type is not what we expected (e.g. XML instead of JSON)
                if "results" in results:
                    all_results.extend(results["results"]["bindings"])

                    app.logger.debug("Found {} results".format(len(results)))

            except:

                app.logger.warning(
                    "Endpoint at {} did not work as expected. Maybe it's down?"
                    .format(endpoint))
                exc_type, exc_value, exc_traceback = sys.exc_info()

                traceback.print_exception(exc_type,
                                          exc_value,
                                          exc_traceback,
                                          limit=2,
                                          file=sys.stdout)
                app.logger.debug("Continuing with next endpoint...")
                continue
            else:
                if not self.all:
                    app.logger.debug(
                        "Finally, after calling {}".format(endpoint))
                    app.logger.debug(all_results)
                    break
                else:
                    app.logger.debug(
                        "Continuing with next endpoint... (calling all)")
                    continue

        app.logger.debug("Returning results from run_query")
        app.logger.debug(all_results)
        return all_results
Ejemplo n.º 40
0
from typing import List, Tuple
from SPARQLWrapper import SPARQLWrapper, JSON
import json
import urllib
from pathlib import Path
from tqdm import tqdm

sparql = SPARQLWrapper("http://cse-cnc238814s.cse.ohio-state.edu:3093/sparql")
sparql.setReturnFormat(JSON)

path = str(Path(__file__).parent.absolute())

with open(path + '/../ontology/fb_roles', 'r') as f:
    contents = f.readlines()

roles = set()
for line in contents:
    fields = line.split()
    roles.add(fields[1])


def execute_query(query: str) -> List[str]:
    sparql.setQuery(query)
    try:
        results = sparql.query().convert()
    except urllib.error.URLError:
        print(query)
        exit(0)
    rtn = []
    for result in results['results']['bindings']:
        assert len(result) == 1  # only select one variable
Ejemplo n.º 41
0
class QueryMaker:

    # Init function
    def __init__(self):
        self.query = ""
        self.order = ""
        self.paramsList = []
        if not (hasattr(self, "graph")):
            self.normalGraph = Graph()
            self.appGraph = Graph()
            self.sparql = SPARQLWrapper("http://localhost:9000/sparql")
            self.graph = self.normalGraph
        # END IF

    # END FUNCTION

    # addSelect(*string) -> ()
    #   Allows user to choose which parameters will be retrieved
    #   example: addSelect("?Measure", "?Station")
    def addSelect(self, *paramsToSelect: str):
        self.query = "SELECT DISTINCT\n\t"
        for param in paramsToSelect:
            self.query = self.query + param + " "
        # END FOR
        self.query = self.query[0:len(self.query) - 1]
        self.query = self.query + "\nWHERE {\n\t"

    # END FUNCTION

    # addParam(string, string, string) -> ()
    #   Inserts into the query the triplet subject, predicate, object
    #   example: addParam("?Measure", "rdf:type", "ns:Measurement")
    def addParam(self, s: str, p: str, o: str):
        self.paramsList.append((s, p, o))

    # END FUNCTION

    # addFilter(string) -> ()
    #   Inserts into the query a filtering sentence
    #   example: addFilter("REGEX (?StLabel, \"Moratalaz\")")
    def addFilter(self, filter: str):
        filter = self.fixDate(filter)
        self.paramsList.append(("\tFILTER", filter))

    # END FUNCTION

    # addOrder(string) -> ()
    #   Orders the result of the query with the ordering sentence passed
    #   example: addOrder("xsd:integer(?Code)")
    def addOrder(self, order: str):
        self.order = self.order + "ORDER BY " + order

    # END FUNCTION

    # executeQuery () -> List<Dictionary>
    #   Queries the graph and returns a list with the dictionary for each row
    #   example: executeQuery() -> [{"Measure":"http:/...", "Station":"http:/..."},
    #                                "Measure":"http:/...", "Station":"http:/..."}
    #                              ]
    def executeQuery(self):
        for param in self.paramsList:
            for item in param:
                self.query = self.query + item + " "
            # END FOR
            if "\tFILTER" in param:
                self.query = self.query[0:len(self.query) - 1]
            else:
                self.query = self.query + "."
            # END IF-ELSE
            self.query = self.query + "\n\t"
        # END FOR
        self.query = self.query[0:len(self.query) - 1]
        self.query = self.query + "}"
        if not (self.order == ""):
            self.query = self.query + "\n" + self.order
        # END IF
        self.query = self.getNamespaces() + self.query

        self.sparql.setQuery(self.query)
        self.sparql.setReturnFormat(JSON)
        results = self.sparql.query().convert()
        listResult = []

        for row in results["results"]["bindings"]:
            rowDict = {}
            for key in row.keys():
                rowDict[key] = row[key]["value"]
                if (key == "Date"):
                    rowDict[key] = row[key]["value"][0:len(row[key]["value"]) -
                                                     10]
                # END IF
            # END FOR
            listResult.append(rowDict)
        # END FOR
        return listResult

    # END FUNCTION

    ## appQuery() -> List<Dictionary>
    #   Queries the measurements with given filters (or not)
    #   example: appQuery([False, False, False], []) -> List of all measurements
    #   example: appQuery([True, False, False], [{"Place":"District","ID":"#districtID"}]) -> List of measurements in given district
    #   example: appQuery([True, False, False], [{"Place":"Street","ID":"#streetID"}]) -> List of measurements in given street
    #   example: appQuery([True, False, False], [{"Place":"Station","ID":"#stationCode"}]) -> List of measurements in given station
    #   example: appQuery([False, True, False], ["2014"]) -> List of measurements in 2014
    #   example: appQuery([False, True, False], ["2014-04"]) -> List of measurements in April 2014
    #   example: appQuery([False, True, False], ["2014-04-26"]) -> List of measurements in 26th April 2014
    #   example: appQuery([False, False, True], ["#magnitudeID"]) -> List of measurements of given magnitude
    def appQuery(self, paramsUsed: list, paramsList: list):
        paramsList.reverse()
        self.addSelect("?Measure", "?StationLb", "?Date", "?MagnitudeLbEs",
                       "?MagnitudeLbEn", "?MagnitudeCode", "?Value")
        self.addParam("?Measure", "rdf:type", "ns:Measurement")
        self.addParam("?Measure", "ns:measuredAt", "?Station")
        self.addParam("?Station", "rdfs:label", "?StationLb")
        self.addParam("?Measure", "ns:dateOfMeasure", "?Date")
        self.addParam("?Magnitude", "rdf:type", "ns:Magnitude")
        self.addParam("?Magnitude", "rdfs:label",
                      "?MagnitudeLbEs , ?MagnitudeLbEn")
        self.addFilter(
            "(LANG(?MagnitudeLbEn) = \'en\' && LANG(?MagnitudeLbEs) = \'es\')")
        self.addParam("?Measure", "ns:measuredMagnitude", "?Magnitude")
        self.addParam("?Measure", "ns:measureValue", "?Value")
        self.addParam("?Magnitude", "ns:measureCode", "?MagnitudeCode")
        if paramsUsed[0] == True:
            dictionary = paramsList.pop()
            placeType = dictionary["Place"]
            identifier = dictionary["ID"]
            if placeType == "District":
                self.addParam("?District", "rdf:type", "ns:District")
                self.addParam(
                    "?District", "ns:districtID",
                    "\"{}\"^^<http://www.w3.org/2001/XMLSchema#integer>".
                    format(identifier))
                self.addParam("?Station", "ns:inDistrict", "?District")
            elif placeType == "Street":
                self.addParam("?Street", "rdf:type", "ns:Street")
                self.addParam(
                    "?Street", "ns:streetID",
                    "\"{}\"^^<http://www.w3.org/2001/XMLSchema#integer>".
                    format(identifier))
                self.addParam("?Station", "ns:inStreet", "?Street")
            elif placeType == "Station":
                self.addParam("?Station", "rdf:type", "ns:Station")
                self.addParam(
                    "?Station", "ns:stationCode",
                    "\"{}\"^^<http://www.w3.org/2001/XMLSchema#string>".format(
                        identifier))
            else:
                print("Place " + placeType + " not identified")
                exit()
            # END IF
        # END IF
        if paramsUsed[1] == True:
            date = paramsList.pop()
            splitted = date.split("-")
            if len(splitted) == 1:
                self.addFilter("REGEX (STR(?Date), \"^{}\", \"i\")".format(
                    splitted[0]))
            elif len(splitted) == 2:
                self.addFilter("REGEX (STR(?Date), \"^{}-{}\", \"i\")".format(
                    splitted[0], splitted[1]))
            elif len(splitted) == 3:
                self.addFilter(
                    "REGEX (STR(?Date), \"^{}-{}-{}\", \"i\")".format(
                        splitted[0], splitted[1], splitted[2]))
            else:
                print("Date " + date + " wrong formatted (use YYYY-MM-DD)")
                exit()
            # END IF
        # END IF
        if paramsUsed[2] == True:
            magnitude = paramsList.pop()
            self.addParam(
                "?Magnitude", "ns:measureCode",
                "\"{}\"^^<http://www.w3.org/2001/XMLSchema#string>".format(
                    magnitude))
        # END IF
        self.addOrder("asc(?Date)")
        listResult = self.executeQuery()
        return listResult

    # END FUNCTION

    # cleanQuery () -> ()
    #   Flushes the current query and params in order to prepare a new one
    def cleanQuery(self):
        self.__init__()

    # END FUNCTION

    # [private function] getNamespaces() -> Dictionary
    #   Returns the dictionary with the namespaces used in the current query
    #   example: getNamespaces() -> {"ns":ns, "rdfs":RDFS, "rdf":RDF}
    def getNamespaces(self):
        initNs = ""
        if (self.query.find("ns:") > 0):
            initNs = initNs + "PREFIX ns: <http://www.semanticweb.org/group16/ontologies/air-quality#>\n" ""
        if (self.query.find("wiki:") > 0):
            initNs = initNs + "PREFIX wiki: <http://www.wikidata.org/entity/>\n" ""
        if (self.query.find("rdf:") > 0):
            initNs = initNs + "PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>\n" ""
        if (self.query.find("rdfs:") > 0):
            initNs = initNs + "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>\n" ""
        if (self.query.find("owl:") > 0):
            initNs = initNs + "PREFIX owl: <http://www.w3.org/2002/07/owl#>\n" ""
        if (self.query.find("sc:") > 0):
            initNs = initNs + "PREFIX sc: <https://schema.org/>\n" ""
        return initNs

    # END FUNCTION

    # [private function] fixDate(string) -> string
    #   Returns a fixed fitler for date queries
    #   example: fixDate("REGEX (STR(?Date), "^2012-1-30", "i")" -> "REGEX (STR(?Date), "^2012-01-30", "i")"
    def fixDate(self, filter: str):
        idx = filter.find("^")
        if idx > 0:
            substr = filter[idx + 1:]
            splitted = substr.split("\"")
            date = splitted[0]
            splitted = date.split("-")
            if len(splitted) > 1 and splitted[1].find("0") < 0 and len(
                    splitted[1]) == 1:
                splitted[1] = "0" + splitted[1]
            if len(splitted) > 2 and splitted[2].find("0") < 0 and len(
                    splitted[2]) == 1:
                splitted[2] = "0" + splitted[2]
            if len(splitted) == 1:
                date = splitted[0]
            elif len(splitted) == 2:
                date = splitted[0] + "-" + splitted[1]
            else:
                date = splitted[0] + "-" + splitted[1] + "-" + splitted[2]
            filter = "REGEX (STR(?Date), \"^{}\", \"i\")".format(date)
        return filter
Ejemplo n.º 42
0
 def update_query(self, repository_name, query):
     sparql = SPARQLWrapper(self.repository_url_update_tmplt %
                            repository_name)
     sparql.setQuery(query.strip())
     sparql.setMethod('POST')
     sparql.query()
Ejemplo n.º 43
0
 def __init__(self,
              endpoint="https://query.wikidata.org/sparql",
              simplified: bool = True):
     self.sparql = SPARQLWrapper(endpoint, agent=UserAgent().random)
     self.sparql.setReturnFormat(JSON)
     self.simplified = simplified
Ejemplo n.º 44
0
def fetch_dbpedia_triples(entity_labels, ignored_properties=None):
    """
    Fetch all DBpedia triples for the entities with the given labels (in English),
    while individually caching triples per entity and ensuring only a single HTTP
    request is done for the whole list of entities.
    """

    config = yaml.load(open('config.yaml'))
    db_config = config.get('defaults', {}).get('db', {})

    if db_config.get('type', 'mongo') != 'mongo':
        db_config = {}

    host = db_config.get('location', 'localhost')
    db_name = db_config.get('name', 'army_ant')

    mongo = MongoClient(host)

    cache = mongo[db_name]['entity_triples']
    cache.create_index('label')

    if ignored_properties is None:
        ignored_properties = ['http://dbpedia.org/ontology/wikiPageWikiLink']

    triples = set([])

    entity_uris = set([])
    cached_count = 0
    for entity_label in entity_labels:
        cached_entity = cache.find_one({'label': entity_label})
        if cached_entity:
            if 'triples' in cached_entity and len(cached_entity['triples']) > 0:
                s = (cached_entity['uri'], cached_entity['label'])
                for triple in cached_entity['triples']:
                    p = (triple['predicate']['uri'], triple['predicate']['label'])
                    o = (triple['object']['uri'], triple['object']['label'])
                    triples.add((s, p, o))
            cached_count += 1
        else:
            entity_uris.add(
                '<http://dbpedia.org/resource/%s>' % urllib.parse.quote_plus(entity_label.replace(' ', '_')))

    logger.debug("%d out of %d entities with cached triples" % (cached_count, len(entity_labels)))

    if len(entity_uris) == 0:
        return triples

    sparql = SPARQLWrapper(dbpedia_sparql_url)

    for entity_uris_chunk in chunks(list(entity_uris), 50):
        query = '''
                SELECT ?s ?sLabel ?p ?pLabel ?o ?oLabel
                WHERE {
                VALUES ?s { %s }
                ?s ?p ?o .
                ?s rdfs:label ?sLabel .
                ?p rdfs:label ?pLabel .
                ?o rdfs:label ?oLabel .
                FILTER (langMatches(lang(?sLabel), 'en')
                    && langMatches(lang(?pLabel), 'en')
                    && langMatches(lang(?oLabel), 'en'))
                }
            ''' % ' '.join(entity_uris_chunk)

        # print(query)
        sparql.setQuery(query)
        sparql.setReturnFormat(JSON)

        result = sparql.query()
        data = result.response.read()
        # print(data.decode('utf-8'))
        data = json.loads(data.decode('utf-8'))

        cache_data = {}

        for binding in data['results']['bindings']:
            if ignored_properties and binding['p']['value'] in ignored_properties:
                continue

            s = (binding['s']['value'], binding['sLabel']['value'])
            p = (binding['p']['value'], binding['pLabel']['value'])
            o = (binding['o']['value'], binding['oLabel']['value'])

            if s not in cache_data:
                cache_data[s] = []

            cache_data[s].append({
                'predicate': {'uri': p[0], 'label': p[1]},
                'object':    {'uri': o[0], 'label': o[1]}
            })

            triples.add((s, p, o))

        for k, v in cache_data.items():
            cache.insert({
                'uri': k[0],
                'label': k[1],
                'triples': v
            })

        for entity_label in set(entity_labels).difference(cache_data.keys()):
            cache.insert({
                'label': entity_label,
                'triples': []
            })

    return list(triples)
Ejemplo n.º 45
0
def get_results(endpoint_url, query):
    sparql = SPARQLWrapper(endpoint_url)
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    return sparql.query().convert()
Ejemplo n.º 46
0
# https://rdflib.github.io/sparqlwrapper/
from SPARQLWrapper import SPARQLWrapper, JSON
import json
import pprint

# wikidata query for all the lakes in the US
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""#List of all the lakes in US
PREFIX schema: <http://schema.org/>
SELECT  ?lake ?lakeLabel ?article ?coordinate_location ?lake_inflows ?lake_outflow
        ?elevation_above_sea_level ?area ?length ?width ?volume_as_quantity ?watershed_area
        ?perimeter ?residence_time_of_water ?vertical_depth ?GNIS_ID ?GeoNames_ID
WHERE { ?lake (wdt:P31/wdt:P279*) wd:Q23397.
        ?lake wdt:P17 wd:Q30.
  SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
  OPTIONAL { ?article schema:about ?lake.
             ?article schema:inLanguage "en".
             ?article schema:isPartOf <https://en.wikipedia.org/>. }
  OPTIONAL { ?lake wdt:P625 ?coordinate_location. }
  OPTIONAL { ?lake wdt:P200 ?lake_inflows. }
  OPTIONAL { ?lake wdt:P201 ?lake_outflow. }
  OPTIONAL { ?lake wdt:P2044 ?elevation_above_sea_level. }
  OPTIONAL { ?lake wdt:P2046 ?area. }
  OPTIONAL { ?lake wdt:P2043 ?length. }
  OPTIONAL { ?lake wdt:P2049 ?width. }
  OPTIONAL { ?lake wdt:P2234 ?volume_as_quantity. }
  OPTIONAL { ?lake wdt:P2053 ?watershed_area. }
  OPTIONAL { ?lake wdt:P2547 ?perimeter. }
  OPTIONAL { ?lake wdt:P3020 ?residence_time_of_water. }
  OPTIONAL { ?lake wdt:P4511 ?vertical_depth. }
  OPTIONAL { ?lake wdt:P590 ?GNIS_ID. }
Ejemplo n.º 47
0
#RecupAbstractVillesOcDBpedia.py
# créé le: 10/02/2018
# par Eve Séguier
# The aim of this program is to get in dbpedia, label and comment of french towns where the word 'occitan' is found in the comment and to putb the result in the file

from SPARQLWrapper import SPARQLWrapper, JSON
import json
import csv

endpoint = "http://fr.dbpedia.org/sparql"

with open('abstractvillesOcDBpedia.csv', 'w', newline='',
          encoding='utf-8') as csvfile:

    spamwriter = csv.writer(csvfile, delimiter=',', quotechar='"')
    sparql = SPARQLWrapper(endpoint)

    querystring = """
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX db-owl: <http://dbpedia.org/ontology/>
SELECT  ?ville ?label ?comment ?code
WHERE {
    ?ville db-owl:country <http://fr.dbpedia.org/resource/France> ;
        db-owl:inseeCode ?code;
        rdf:type db-owl:Settlement ;
	rdfs:comment ?comment  ;
        rdfs:label ?label 

FILTER regex(?comment,".*occitan.*")
FILTER langmatches(lang(?label),"fr")
FILTER langmatches(lang(?comment),"fr")
Ejemplo n.º 48
0
def getNodes(URI):
    # Empties context, returns context
    global context
    context = []
    c = conn2.cursor()
    c.execute('SELECT * FROM nci WHERE URI=?', (URI, ))
    result = c.fetchall()
    c.close()

    if len(result) > 0:
        context = eval(result[0][1])
        c.close()
        return context
    else:
        sparql = SPARQLWrapper(endpoint)
        sparql.setReturnFormat(JSON)
        print URI.rsplit('/')[-1], "has",

        # URI is_a X
        querystring = """
        PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
        SELECT DISTINCT ?s WHERE { <""" + str(
            URI) + """> rdfs:subClassOf ?s . FILTER ( isURI(?s )) . }"""
        sparql.setQuery(querystring)
        results = sparql.query().convert()
        for x in results["results"]["bindings"]:
            context.append([URI, "is a", x["s"]["value"]])

        # X is a URI
        querystring = """
        PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
        SELECT DISTINCT ?o WHERE { ?o rdfs:subClassOf <""" + str(
            URI) + """> . FILTER (isURI(?o )) . }"""
        sparql.setQuery(querystring)
        results = sparql.query().convert()
        for x in results["results"]["bindings"]:
            context.append([x["o"]["value"], 'is a', URI])

        # URI part_of X
        querystring = """
        PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
        PREFIX owl:<http://www.w3.org/2002/07/owl#>

        SELECT DISTINCT ?s ?p WHERE {
        <""" + str(URI) + """> rdfs:subClassOf ?b1 . FILTER ( isBLANK(?b1)) .
        ?b1 owl:someValuesFrom ?s .
        ?b1 owl:onProperty ?p . }"""
        sparql.setQuery(querystring)
        results = sparql.query().convert()
        for x in results["results"]["bindings"]:
            if "part_of" in x["p"]["value"].lower():
                context.append([URI, x["p"]["value"], x["s"]["value"]])

        # X part_of URI
        querystring = """
        PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
        PREFIX owl:<http://www.w3.org/2002/07/owl#>
        
        SELECT DISTINCT ?o ?p WHERE {
        ?blank owl:someValuesFrom <""" + str(
            URI) + """> . FILTER ( isBLANK(?blank)) .
        ?blank owl:onProperty ?p .
        ?o rdfs:subClassOf ?blank . FILTER ( isURI(?o )) . }"""
        sparql.setQuery(querystring)
        results = sparql.query().convert()
        for x in results["results"]["bindings"]:
            if "part_of" in x["p"]["value"].lower():
                context.append([x["o"]["value"], x["p"]["value"], URI])

        print len(context), "neighbours (to db)"
        c = conn2.cursor()
        t = (URI, str(context))
        c.execute('insert into nci values (?,?)', t)
        conn2.commit()
        c.close()
    return context
Ejemplo n.º 49
0
def get_paths(Book1, Book2, id1, id2, out):
    sparql = SPARQLWrapper("http://eculture2.cs.vu.nl:6543/sparql/")
    paths = []
    Book1 = re.sub("'", "%27", Book1)
    Book2 = re.sub("'", "%27", Book2)
    query_paths_L1 = "SELECT DISTINCT ?prop WHERE {<" + Book1 + "> ?prop <" + Book2 + "> .}"
    query_paths_L2_1 = "SELECT DISTINCT ?prop1 ?t2 ?prop2 ?v1 WHERE {<" + Book1 + "> ?prop1 ?v1 . <" + Book2 + "> ?prop2 ?v1. ?v1 rdf:type ?t2 .}"
    query_paths_L2_2 = "SELECT DISTINCT ?prop1 ?t2 ?prop2 ?v1 WHERE {<" + Book1 + "> ?prop1 ?v1 . ?v1 ?prop2 <" + Book2 + ">. ?v1 rdf:type ?t2 .}"
    query_paths_L3_1 = "SELECT distinct ?prop1 ?t2 ?prop2 ?t3 ?prop3WHERE {<" + Book1 + "> ?prop1 ?v1 .  ?v1 ?prop2 ?v2 .  ?v2 ?prop3 <" + Book2 + "> .?v1 rdf:type ?t2 . ?v2 rdf:type ?t3 .}"

    #print 'L1'
    #print query_paths_L1
    sparql.setQuery(query_paths_L1)
    sparql.setReturnFormat(JSON)
    L1 = sparql.query().convert()

    for row in L1["results"]["bindings"]:
        res1 = '"' + id1 + '","' + id2 + '",'
        res = '"' + row["prop"]["value"] + '"\n'
        #if id1 not in book_paths.keys():
        #	book_paths[id1]={}
        #if id2 not in book_paths[id1].keys():
        #	book_paths[id1][id2]=[]
        #book_paths[id1][id2].append(res)
        paths.append(res1 + res)
        #out.write(res)
    try:
        sparql.setQuery(query_paths_L2_1)
        sparql.setReturnFormat(JSON)
        L2 = sparql.query().convert()

        for row in L2["results"]["bindings"]:
            res1 = '"' + id1 + '","' + id2 + '",'
            res = '"' + row["prop1"]["value"] + ','
            res += row["t2"]["value"] + ','
            res += row["prop2"]["value"] + '"\n'
            #out.write(res)
            #if id1 not in book_paths.keys():
            #	book_paths[id1]={}
            #if id2 not in book_paths[id1].keys():
            #	book_paths[id1][id2]=[]
            #book_paths[id1][id2].append(res)
            paths.append(res1 + res)
    except:
        print 'L2_1 wrong'

    sparql.setQuery(query_paths_L2_2)
    sparql.setReturnFormat(JSON)
    L2 = sparql.query().convert()

    for row in L2["results"]["bindings"]:
        res1 = '"' + id1 + '","' + id2 + '",'
        res = '"' + row["prop1"]["value"] + ','
        res += row["t2"]["value"] + ','
        res += row["prop2"]["value"] + '"\n'
        #out.write(res)

        #if id1 not in book_paths.keys():
        #	book_paths[id1]={}
        #if id2 not in book_paths[id1].keys():
        #		book_paths[id1][id2]=[]
        #	book_paths[id1][id2].append(res)
        paths.append(res1 + res)
    return paths
Ejemplo n.º 50
0
def getRecetteList():
    parameters = request.args

    # filter of the SPARQL query
    filter_clause = ""

    # filter on multiple ingredients
    filter_ingredients = ""

    # filter on multiple ingredients
    filter_keywords = ""

    # filter on note
    # Add the filter only if the note is provided
    note = parameters.get('note')
    if note is not None:
        if filter_clause == "":
            filter_clause = "FILTER( xsd:float(?ratingValue)>" + note + " "
        else:
            filter_clause += "&& xsd:float(?ratingValue)>" + note + " "

    # filter on tempDePrep
    # Add the filter only if the tempDePrep is provided
    tempDePrep = parameters.get('tempDePrep')
    if tempDePrep is not None:
        if filter_clause == "":
            filter_clause = 'FILTER( "' + tempDePrep + '"^^xsd:duration > xsd:duration(?totalTime) '
        else:
            filter_clause += '&& "' + tempDePrep + '"^^xsd:duration > xsd:duration(?totalTime) '

    # filter on typeCuisine
    # Add the filter only if the typeCuisine is provided
    typeCuisine = parameters.get('typeCuisine')
    if typeCuisine is not None:
        if filter_clause == "":
            filter_clause = "FILTER( CONTAINS(str(?cuisine),'" + typeCuisine + "' ) "
        else:
            filter_clause += "&& CONTAINS(str(?cuisine), '" + typeCuisine + "' ) "

    # Close the parenthesis at the end of the clause
    if filter_clause != "":
        filter_clause += ")."

    # filter on ingredient
    # Add the filter only if the ingredient is provided
    ingredientsList = parameters.get('ingredients')
    if ingredientsList is not None:
        ingredients = ingredientsList.split(',')
        for ingredient in ingredients:
            if filter_ingredients == "":
                filter_ingredients = "FILTER( CONTAINS(str(?ingredients), '" + ingredient + "' ) "
            else:
                filter_ingredients += "&& CONTAINS(str(?ingredients), '" + ingredient + "' ) "

    # Close the parenthesis at the end of the clause
    if filter_ingredients != "":
        filter_ingredients += ")."

    # filter on keyword
    # Add the filter only if the keyword is provided
    keywordsList = parameters.get('keywords')
    if (keywordsList is not None) and (keywordsList != ''):
        keywords = keywordsList.split(' ')
        for keyword in keywords:
            if filter_keywords == "":
                filter_keywords = "FILTER( CONTAINS(LCASE(str(?keywords)), '" + keyword + "' ) "
            else:
                filter_keywords += "|| CONTAINS(LCASE(str(?keywords)), '" + keyword + "' ) "

    # Close the parenthesis at the end of the clause
    if filter_keywords != "":
        filter_keywords += ")."

    query = """SELECT DISTINCT
            ?name 
            ?desc 
            ?img
            ?totalTime
            ?ratingValue
            ?source
        WHERE {
            {
            SELECT 
                ?desc 
                ?name 
                ?img
                ?totalTime
                ?ratingValue
                Min(?source) AS  ?source
                (group_concat(DISTINCT ?ingredients;separator = ";") as ?ingredients)
                (group_concat(DISTINCT ?keywords;separator = ";") as ?keywords)
            WHERE {
                SELECT DISTINCT
                    ?desc 
                    ?name 
                    ?img
                    ?ingredients
                    ?totalTime
                    ?ratingValue
                    ?source
                    ?keywords
                WHERE
                {
                    ?recipe a schema:Recipe;
                    schema:description ?desc;
                    schema:name ?name;
                    schema:image ?img;
                    schema:recipeCuisine ?cuisine;
                    schema:ingredients ?ingredients;
                    schema:keywords ?keywords;               
                    schema:ratingValue ?ratingValue;
                    schema:totalTime ?totalTime;
                    wdrs:describedby ?source.
                    """ + filter_clause + """
                }
            }
            GROUP BY ?desc ?name ?img ?totalTime ?ratingValue
            }
        """ + filter_ingredients + """  """ + filter_keywords + """
        } """

    # get the result of the query in json
    sparql = SPARQLWrapper("http://linkeddata.uriburner.com/sparql")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    # get summary for each recette
    results = mappingSmallSummary(results)
    resp = make_response(results)
    resp.headers.set('Access-Control-Allow-Origin', '*')
    return resp
Ejemplo n.º 51
0
 def _select_datasets_in_triplestore_base(self, query, datastore_name):
     """
     Create a new dataset in the triplestore
     :param query: query of the sparql request
     :param datastore_name: name of the dtastore
     """
     if not datastore_name:
         LOGGER.debug(u'No datastore name is given! Skipping...')
         return
     sparql_wrapper = SPARQLWrapper(
         self._get_query_endpoint(datastore_name))
     sparql_wrapper.setQuery(query)
     sparql_wrapper.setMethod(POST)
     sparql_wrapper.setTimeout(10)
     sparql_wrapper.setReturnFormat(JSON)
     return sparql_wrapper.query()
Ejemplo n.º 52
0
                    ((int(rowdf['Value 1']) -
                      (localTimestamp & 0xFFFF)) & 0xFFFF)) - localTimeDif
        rowTime = startDate + timedelta(seconds=tempTime)
        rowDate = str(rowTime).split(" ", 1)[0]
        rowTime = str(rowTime).split(" ", 1)[1]
        rowValue = rowdf['Value 2']
        if rowValue != 0:
            the_file.write(
                '<http://example.org/#measurement' + rowDate + "T" + rowTime +
                'FR235> <http://example.org/hasType> <http://example.org/Type/heartRateMeasurement> ;\n'
            )
            the_file.write('    <http://example.org/hasValue> ' +
                           str(int(rowValue)) + ' ;\n')
            the_file.write(
                '    <http://example.org/device> <http://example.org/#GarminFR235> ;\n'
            )
            the_file.write('    <http://example.org/measuredOn> "' + rowDate +
                           "T" + rowTime + '"^^xsd:dateTime .\n')
    the_file.write('}\n\n')
    the_file.close()

# read in turtle file and insert into triple store
with open(fileStem + ".ttl", 'r') as file:
    data = file.read()
file.close()
sparql = SPARQLWrapper("http://localhost:3030/TDB/update")
sparql.setQuery(data)
sparql.method = 'POST'
sparql.setReturnFormat(JSON)
results = sparql.query()
Ejemplo n.º 53
0
import sys
import re
import os
import requests
import wget
from w3lib.html import replace_entities
import json
import csv
import shutil
from SPARQLWrapper import SPARQLWrapper, JSON
import time
from rdflib import Graph, Namespace, BNode, URIRef, Literal

#query wikidata
sparql = SPARQLWrapper(
    "https://query.wikidata.org/sparql",
    agent='LexBib-Bibliodata-enrichment-script (lexbib.org)')
sparql.setQuery(
    """SELECT ?isocode ?lang ?langName (lang(?langName) as ?langNamelang)
				WHERE
					{
					?lang wdt:P220 ?isocode .
					?lang rdfs:label ?langName .
					filter regex(str(lang(?langName)) , "^en|^de|^es|^eu|^ca|^gl|^sl|^fr|^nl|^hr|^cs|^da|^et|^fi|^el|^hu|^ga|^it|^lv|^lb|^mt|^nb|^nn|^pl|^pt|^sk|^sv")


					} """)
sparql.setReturnFormat(JSON)
#wdquerycount = wdquerycount + 1

time.sleep(1.5)
Ejemplo n.º 54
0
import sys
from KafNafParserPy import *
import redis
from SPARQLWrapper import SPARQLWrapper, JSON
from collections import defaultdict, Counter
import ast
import subprocess
from rdflib import Graph, URIRef
import jsonrpclib
from simplejson import loads
server = jsonrpclib.Server("http://localhost:3456/")

def tokensToOffsets(words, startToken, endToken):
        return words[startToken][1]['CharacterOffsetBegin'], words[endToken][1]['CharacterOffsetEnd']

sparql = SPARQLWrapper("http://dbpedia.org/sparql")
nones=["none", "nil", "--nme--"]

def getCorefChains(g):
	documentText=getNIFString(g)
#	if not documentText.startswith("BADMINTON - WORLD GRAND PRIX RESULTS. BALI 1996-12-06 Results") and not documentText.startswith("CRICKET - 1997 ASHES INTINERARY. LONDON 1996-08-30 Australia"):
	if False:
		result = loads(server.parse(documentText))
		chains=[]
		sentences=result['sentences']
		if 'coref' in result:
			coref=result['coref']
			for chain in coref:
				offsetChain=set()
				for pair in chain:
					for phrase in pair:
Ejemplo n.º 55
0
class QuestionSolver:
    def __init__(self):
        self.sparql = SPARQLWrapper('https://query.wikidata.org/sparql')
        self.wiki_api_url = 'https://www.wikidata.org/w/api.php'
        self.nlp = spacy.load('en_core_web_md')
        self.matcher = self.init_matcher()
        self.stop_words = {'a', 'by', 'of', 'the', '\'s', '"'}
        # simple translation dictionary to convert some phrasings into query keywords
        self.trans_dict = {
            'direct': 'director',
            'write': 'author',
            'compose': 'composer',
            'invent': 'inventor',
            'bear': 'birth',
            'die': 'death',
        }

    def init_matcher(self):
        matcher = Matcher(self.nlp.vocab)
        matcher.add('WHEN_WHERE', None, [{
            'LOWER': {
                'IN': ['when', 'where']
            }
        }, {
            'DEP': {
                'IN': ['ROOT', 'aux', 'auxpass']
            }
        }])
        matcher.add('X_OF_Y', None, [{
            'DEP': 'attr',
            'LOWER': {
                'IN': ['who', 'what']
            }
        }, {
            'LOWER': {
                'IN': ['is', 'are', 'was', 'were']
            }
        }])
        matcher.add('WHO_DID_X', None, [{
            'DEP': 'nsubj',
            'LOWER': 'who'
        }, {
            'DEP': 'ROOT'
        }])
        return matcher

    def answer_question(self, question):
        try:
            parsed_question = self.parse_question(question.strip().strip(' ?'))
            for answer in self.query_answer(parsed_question[0],
                                            parsed_question[1]):
                answer = answer['answerLabel']['value']
                try:
                    date = datetime.strptime(answer, '%Y-%m-%dT%H:%M:%SZ')
                    print(date.strftime('%m/%d/%Y'))
                except ValueError:
                    print(answer)

        except NoAnswerError as err:
            print(err)

    def parse_question(self, question):
        result = self.nlp(question)
        results = self.matcher(result)

        try:
            match_id, start, end = results[0]
        except IndexError:
            raise NoAnswerError(
                'Question is ill-formed, cannot answer this question')

        if result.vocab.strings[match_id] == 'WHEN_WHERE':
            entity = [
                w.text
                for w in next(w for w in result
                              if w.dep_ in ['nsubj', 'nsubjpass']).subtree
            ]
            prop_one = result[0].lemma_
            prop_two = result[-1].lemma_
            prop = [prop_one, prop_two]

        elif result.vocab.strings[match_id] == 'X_OF_Y':
            prop_ent = next(w for w in result if w.dep_ == 'pobj')
            prop = [w.text for w in prop_ent.head.head.lefts
                    ] + [prop_ent.head.head.text]
            entity = [w.text for w in prop_ent.subtree]

        elif result.vocab.strings[match_id] == 'WHO_DID_X':
            prop = ['who', next(w for w in result if w.dep_ == 'ROOT').lemma_]
            entity = [w.text for w in result[end:]]

        prop = self.translate_query(prop)

        entity = ' '.join(w for w in entity if w not in self.stop_words)

        return prop, entity

    def translate_query(self, query):
        query = [w for w in query if w not in self.stop_words]
        new_query = ' '.join(query)  # default is to simply join the words

        # in some cases, the words in questions must be "translated"
        if 'members' in query:
            return 'has part'

        if len(query) < 2:
            return new_query

        if query[1] in ['direct', 'write', 'compose', 'invent']:
            if query[0] == 'who':
                new_query = self.trans_dict[query[1]]
            if query[0] == 'when':
                new_query = 'inception'

        elif query[1] in ['bear', 'die']:
            if query[0] == 'when':
                new_query = 'date of ' + self.trans_dict[query[1]]
            elif query[0] == 'where':
                new_query = 'place of ' + self.trans_dict[query[1]]

        elif query[1] == ['publish', 'release']:
            if query[0] == 'who':
                new_query = 'publisher'
            elif query[0] == 'when':
                new_query = 'publication date'

        return new_query

    def query_wikidata_api(self, string, prop_search=False):
        params = {
            'action': 'query',
            'format': 'json',
            'list': 'search',
            'srsearch': unidecode(string),
            'srnamespace': 120 if prop_search else 0,
            'srlimit': 5,
            'srprop': '',
        }

        results = get(self.wiki_api_url, params).json()['query']['search']

        if results:
            return [
                res['title'][9:] if prop_search else res['title']
                for res in results
            ]

        return None

    def query_answer(self, prop, entity):
        wikidata_props = self.query_wikidata_api(prop, True)
        wikidata_entities = self.query_wikidata_api(entity)

        if wikidata_props is None or wikidata_entities is None:
            raise NoAnswerError

        for wikidata_entity in wikidata_entities:
            for wikidata_prop in wikidata_props:
                query_string = ('SELECT ?answerLabel WHERE {{ '
                                '  wd:{} wdt:{} ?answer . '
                                '  SERVICE wikibase:label {{ '
                                '    bd:serviceParam wikibase:language "en" .'
                                '  }}'
                                '}}'.format(wikidata_entity, wikidata_prop))

                self.sparql.setQuery(query_string)
                self.sparql.setReturnFormat(JSON)
                results = self.sparql.query().convert()['results']['bindings']

                if not results:
                    continue

                return results

        raise NoAnswerError
Ejemplo n.º 56
0
 def login(self, user, password):
     if '/sparql' not in self._server:
         self._server += '/sparql'
     p = self._server.find('/sparql')
     resource = self._server[:p]
     login_endpoint = SPARQLWrapper(resource + '/login')
     login_endpoint.setMethod(POST)
     login_endpoint.addCustomHttpHeader(
         'Content-Type', 'application/x-www-form-urlencoded')
     login_endpoint.addCustomHttpHeader('Accept', 'text/plain')
     login_endpoint.addCustomHttpHeader('charset', 'utf-8"')
     login_endpoint.addParameter('email', user)
     login_endpoint.addParameter('password', password)
     self.user = user
     self.authentication_key = login_endpoint.query().response.read(
     ).decode("utf-8")
Ejemplo n.º 57
0
# --- imports ---
from pprint import pprint
import re
from SPARQLWrapper import SPARQLWrapper, JSON
sparql = SPARQLWrapper("http://dbpedia.org/sparql")
sparql.query()
sparql.addDefaultGraph("http://dbpedia.org")

# --- constants ---
VERBOSE = False

# ontology
SPARQ_AUTHOR_NAME = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX dbp: <http://dbpedia.org/ontology/>
    SELECT ?person
    WHERE {{
        ?person a dbp:Person .
        ?person foaf:name "{}"@en .
    }}
    LIMIT 100
"""
SPARQ_MOVEMENTS = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX dbp: <http://dbpedia.org/ontology/>
    PREFIX dct: <http://purl.org/dc/terms/>
    SELECT ?genre ?genre_name
    WHERE {{
        <{}> dbp:genre ?genre .
        ?genre dct:subject dbc:Literary_movements .
        ?genre rdfs:label ?genre_name .
Ejemplo n.º 58
0
def retrieve_from_wikidata_second_round(seen_player_id_dict):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
    query = """
        SELECT ?player ?playerLabel
        WHERE {
            ?player wdt:P106/(wdt:P279|wdt:P31)* wd:Q3665646 .
            ?player wdt:P21 wd:Q6581097 .
            ?player wdt:P27 wd:Q30 .
            OPTIONAL { ?player wdt:P1532 wd:Q30 } .
            SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } .
        }
    """
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    wikidata_qualifier_rs = sparql.query().convert()
    qualife_result_df = pd.json_normalize(
        wikidata_qualifier_rs["results"]["bindings"])

    resultf_df_ls = []

    wikidata_qualifier_ls = qualife_result_df["player.value"]
    wikidate_label_ls = qualife_result_df["playerLabel.value"]

    new_wikidata_id_ls = []
    test = 0
    for i, wikidata_qualifier in enumerate(wikidata_qualifier_ls):
        wikidata_qualifier = get_wikidata(wikidata_qualifier)
        wikidate_label = wikidate_label_ls[i]
        wikidate_label = wikidate_label.strip()

        new_wikidata_id_ls.append(
            dict(wikidata_id=wikidata_qualifier, player_name=wikidate_label))

        # 查询对应信息

        # sparql.setQuery(query_each_person)
        # sparql.setReturnFormat(JSON)
        # wikidata_rs = sparql.query().convert()
        # wiki_df = pd.json_normalize(wikidata_rs["results"]["bindings"])
        #
        # resultf_df_ls.append(wiki_df)
        # if len(resultf_df_ls) == 10000:
        #     break

    conn = None
    cursor = None
    try:
        sql = """
            INSERT IGNORE INTO WIKIDATA_INDEX (wikidata_id, player_name)
            VALUE (%(wikidata_id)s, %(player_name)s)
        """
        print(datetime.datetime.now())
        conn = get_mysql_conn()
        cursor = conn.cursor(buffered=True)
        cursor.executemany(sql, new_wikidata_id_ls)
        conn.commit()
        print("committed new wikidata id to database")
    except Exception:
        traceback.print_exc()
        if conn:
            conn.rollback()
    finally:
        if cursor:
            cursor.close()
        if conn:
            conn.close()
Ejemplo n.º 59
0
    def is_endpoint(links: list, first_crawl=True):
        for link in links:
            print(f"\nCurrent Website : {link}")
            # Site to be checked & Query & Timeout configuration
            sparql = SPARQLWrapper(f"{link}", returnFormat=JSON)
            sparql.setQuery("ASK WHERE { ?s ?p ?o. }")
            sparql.setTimeout(30)
            sparql.setOnlyConneg(True)
            link_domain = urlparse(link).netloc

            try:
                # Execute query and convert results to the returnFormat which is JSON.
                query_result = sparql.queryAndConvert()
                if query_result[
                        "boolean"] and not Database.in_the_endpoints_collection(
                            link_domain):
                    Database.insert_to_endpoints_collection(link, link_domain)
                    print("Endpoint written on DB.")
                else:
                    if Database.in_the_endpoints_collection(link_domain):
                        print("Endpoint already exist in DB.")
                    else:
                        print("This site isn't a SPARQL endpoint.")

            except (EndPointNotFound, EndPointInternalError,
                    QueryBadFormed) as e:
                if first_crawl:  # first crawl
                    if is_alive(link) and not Database.in_the_endpoints_collection(link_domain) \
                            and not Database.in_the_second_crawl_domains_collection(link_domain):
                        Database.insert_to_second_crawl_domains_collection(
                            link_domain)
                        print(
                            f"This site's domain is added for second crawl. site : {link_domain}"
                        )
                    elif Database.in_the_endpoints_collection(link_domain) \
                            or Database.in_the_second_crawl_domains_collection(link_domain):
                        print("This domain already exist in DB.")
                    else:
                        print("This site is not alive.")
                        continue
                else:  # second crawl
                    continue

            except (HTTPError, URLError) as UrllibError:
                if first_crawl:  # first crawl
                    if "503" in str(UrllibError):
                        print("This site is not alive.")
                    elif "certificate verify failed" in str(UrllibError) \
                            and not Database.in_the_endpoints_collection(link_domain) \
                            and not Database.in_the_second_crawl_domains_collection(link_domain):
                        Database.insert_to_second_crawl_domains_collection(
                            link_domain)
                        print(
                            f"This site's domain is added for second crawl. site : {link_domain}"
                        )
                    else:
                        Sparql.general_control_for_missed_endpoint(
                            link, link_domain)
                        print("Urllib Error.")
                else:  # second crawl
                    Sparql.general_control_for_missed_endpoint_in_second_crawl(
                        link, link_domain)
                    continue

            except (SPARQLWrapperException, URITooLong,
                    Unauthorized) as WrapperException:
                print("Error while wrapping endpoint: ", WrapperException)
                print("WrapperException")

            except TypeError:
                if first_crawl:  # first crawl
                    Sparql.general_control_for_missed_endpoint(
                        link, link_domain)
                    print("Type Error")
                else:  # second crawl
                    Sparql.general_control_for_missed_endpoint_in_second_crawl(
                        link, link_domain)
                    continue

            except Exception:
                if first_crawl:  # first crawl
                    Sparql.general_control_for_missed_endpoint(
                        link, link_domain)
                    print('Exception: ')
                else:  # second crawl
                    Sparql.general_control_for_missed_endpoint_in_second_crawl(
                        link, link_domain)
                    continue
Ejemplo n.º 60
0
# -*- coding: UTF-8 -*-
'''
Created on 20181025
 
@author: Hansen
'''

from SPARQLWrapper import SPARQLWrapper, JSON

sparql = SPARQLWrapper("http://localhost:3030/movies/sparql")
sparql.setQuery("""
    PREFIX : <http://www.neohope.com/hansen/ontologies/2018/movies#>
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

    SELECT ?title WHERE {
        ?aPerson rdf:type :Person.
        ?aPerson  :personName '巩俐'.
        ?aPerson  :hasActedIn ?aMovie.
        ?aMovie :movieTitle ?title.
        ?aMovie :movieRating ?rating.
        FILTER (?rating>=7)
    }
    LIMIT 10
""")

sparql.setReturnFormat(JSON)
results = sparql.query().convert()

for result in results["results"]["bindings"]:
    print(result["title"]["value"])