Beispiel #1
0
def clear():
    # Configurations
    config = ConfigParser()
    config.read('config.ini')

    endpoint_uri = config['Mandatory']['endpointURI']
    graph_uri = config['Mandatory']['graphURI']

    clean_graph_query = "CLEAR GRAPH <" + graph_uri + ">"

    # Set up endpoint and access to triple store
    sparql = SPARQLWrapper(endpoint_uri)
    sparql.setReturnFormat(JSON)
    sparql.setMethod(POST)
    store = SPARQLUpdateStore(endpoint_uri, endpoint_uri)

    # Specify the (named) graph we're working with
    sparql.addDefaultGraph(graph_uri)

    # Create an in memory graph
    g = Graph(store, identifier=graph_uri)

    # Cleanup the existing triples
    sparql.setQuery(clean_graph_query)
    sparql.query().convert()

    # Cleanup the graph instance
    g.close()
def clear():
	# Configurations
	config = ConfigParser()
	config.read('config.ini')

	endpoint_uri = config['Mandatory']['endpointURI']
	graph_uri = config['Mandatory']['graphURI']

	clean_graph_query = "CLEAR GRAPH <"+graph_uri+">"

	# Set up endpoint and access to triple store
	sparql = SPARQLWrapper(endpoint_uri)
	sparql.setReturnFormat(JSON)
	sparql.setMethod(POST)
	store = SPARQLUpdateStore(endpoint_uri, endpoint_uri)

	# Specify the (named) graph we're working with
	sparql.addDefaultGraph(graph_uri)

	# Create an in memory graph
	g = Graph(store, identifier=graph_uri)

	# Cleanup the existing triples
	sparql.setQuery(clean_graph_query)
	sparql.query().convert()

	# Cleanup the graph instance
	g.close()
Beispiel #3
0
def run_query(endpoint_URL, query, timeout):
    # Creazione del wrapper sull'endpoint selezionato dall'utente
    sparql = SPARQLWrapper(endpoint_URL)
    # Impostazione di defaultGraph (testato solo per DBPEDIA, trovare metodo per farlo funzionare in tutti)
    if endpoint_URL == "http://dbpedia.org/sparql":
        sparql.addDefaultGraph(endpoint_URL.replace('/sparql', ''))
    # Viene impostato timeout di 30 secondi sulla query
    if timeout is not None or timeout != 0:
        sparql.setTimeout(timeout+10)
        # Timeout inserito come parametro della richiesta in millisecondi
        sparql.addParameter("timeout", str(timeout*1000))
    # Viene impostata la query
    sparql.setQuery(query)
    # Viene impostato il formato di conversione del risultato della query in JSON
    sparql.setReturnFormat(JSON)
    # Viene eseguita la query e restituito il dizionario contenente il risultato della stessa se riceve la risposta
    response_data = None

    #! DEBUG stampa della richiesta

    # print(vars(sparql._createRequest()))
    #a = sparql.query()
    # print(a)

    try:
        response_data = sparql.query().convert()
        # print(response_data)
    except socket.timeout:
        response_data = "ERROR: Timeout superato"
    except Exception:
        response_data = "ERROR: Errore generico"
    finally:
        return response_data if type(response_data) is not str and response_data['results']['bindings'] != [] else "Nessun Risultato"
def get_triples(endpoint_uri, graph_uri, qs, triple_format):

    sparql = SPARQLWrapper(endpoint_uri)
    sparql.addDefaultGraph(graph_uri)
    sparql.setQuery(qs)

    if triple_format == "N3":
        sparql.setReturnFormat(N3)
        results = sparql.query().convert()
        return results
    elif triple_format == "JSON":
        sparql.setReturnFormat(JSON)
        results = sparql.query().convert()
        return results
    elif triple_format == "XML":
        sparql.setReturnFormat(XML)
        results = sparql.query().convert()
        return results.toxml()
    elif triple_format == "RDFXML":
        sparql.setReturnFormat(RDFXML)
        results = sparql.query().convert()
        return results.serialize()
    elif triple_format == "CSV":
        sparql.setReturnFormat(CSV)
        results = sparql.query().convert()
        return results
    elif triple_format == "TSV":
        sparql.setReturnFormat(CSV)
        results = sparql.query().convert()
        return results
    else:
        return "bad format!"

    return
def _execute_query(query, endpoint, return_format=JSON, default_graph=None):
    dbpedia_endpoint = SPARQLWrapper(endpoint)
    dbpedia_endpoint.setReturnFormat(return_format)
    if default_graph is not None:
        dbpedia_endpoint.addDefaultGraph(default_graph)
    dbpedia_endpoint.setQuery(query)
    results = dbpedia_endpoint.query()
    return results
def _execute_query(query, endpoint, return_format=JSON, default_graph=None):
    dbpedia_endpoint = SPARQLWrapper(endpoint)
    dbpedia_endpoint.setReturnFormat(return_format)
    if default_graph is not None:
        dbpedia_endpoint.addDefaultGraph(default_graph)
    dbpedia_endpoint.setQuery(query)
    results = dbpedia_endpoint.query()
    return results
 def getSparqlObject(self, graphName=None, query=None):
     sparql = SPARQLWrapper(SparqlStore.SPARQL_ENDPOINT)
     sparql.addDefaultGraph(self.getGraphURI(graphName))
     sparql.setQuery(query)
     sparql.setMethod(POST)
     sparql.queryType = SELECT
     sparql.setReturnFormat(JSON)
     sparql.setTimeout(0.1)
     return sparql
Beispiel #8
0
 def _wrapper(self):
     sparql_wrapper = SPARQLWrapper(self.url)
     sparql_wrapper.user=self.user
     sparql_wrapper.passwd=self.passwd
     if self.default_graph:
         sparql_wrapper.addDefaultGraph(self.default_graph)
     if self.http_auth:
         sparql_wrapper.setHTTPAuth(self.http_auth)
     return sparql_wrapper
Beispiel #9
0
 def _wrapper(self):
     sparql_wrapper = SPARQLWrapper(self.url)
     sparql_wrapper.user = self.user
     sparql_wrapper.passwd = self.passwd
     if self.default_graph:
         sparql_wrapper.addDefaultGraph(self.default_graph)
     if self.http_auth:
         sparql_wrapper.setHTTPAuth(self.http_auth)
     return sparql_wrapper
Beispiel #10
0
class Search(object):
    
    """
    Python class which searches for the entities found in previous step, in the dbpedia database.
        If the entities were not found, they're declared in a list as new entities.
    Attributes:
        new: a list of new entities.
        existing: a list of existing entities.
        sparql: This is a wrapper around a SPARQL service. It helps in creating the query URI and, possibly, convert the result into a more manageable format.
    """
    def __init__(self):
        self.sparql = SPARQLWrapper("http://dbpedia.org/sparql")
        self.sparql.addDefaultGraph("http://dbpedia.org")
        self.new = []
        self.existing = []


    def query(self, entities):
        """
        This function search in the DBpedia and checks if entities exist in knowledge base.

        Args:
            entities: a list of tuples; words, and their labels.

        Returns:
            Returns a list of new enetities which are words not in the DBpedia, and a list of contextual words (uni-gram words) exists in DBpedia. Entities in both lists are considered as nodes in Heterogeneous Textual Graph.
        """
        # we search in the dbpedia to see if the words exist
        for i in entities:
            if re.match("^[a-zA-Z_]*$", i[0]):
                self.sparql.setQuery("""
                SELECT DISTINCT ?item ?label WHERE{
                            ?item rdfs:label ?label .
                            FILTER (lang(?label) = 'en').
                            ?label bif:contains '%s' .
                            ?item dct:subject ?sub
                    }
                """ % i[0])
                try:
                    self.sparql.setReturnFormat(CSV)
                    results = self.sparql.query()
                    triples = results.convert()
                except:
                    triples = '\n'
                    print("query failed")
                # if the word exists, we add it to the to existing list
                if len(triples) > 15:
                    # print(triples)
                    self.existing.append(i)
                # if the word doesn't exists, we add it to the to new list
                else:
                    self.new.append(i)
        return self.new, self.existing
def query_dbpedia_relation(entities):
    # global count
    # count += 1
    # print(count / total)

    e1, e2 = entities[0], entities[1]

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.setQuery("""
                SELECT DISTINCT ?p
                WHERE
                {
                <%s> ?p ?o
                }
            """ % e1)

    sparql.setReturnFormat(JSON)
    sparql.addDefaultGraph("http://dbpedia.org")

    results = sparql.query().convert()

    if len(results["results"]["bindings"]):
        e1_res = list(
            set([
                x['p']['value'] for x in results["results"]["bindings"]
                if ('property' in x['p']['value']
                    or 'ontology' in x['p']['value'])
            ]))

    sparql.setQuery("""
                SELECT DISTINCT ?p
                WHERE
                {
                ?s ?p <%s>
                }
            """ % e2)

    results = sparql.query().convert()

    if len(results["results"]["bindings"]):
        e2_res = list(
            set([
                x['p']['value'] for x in results["results"]["bindings"]
                if ('property' in x['p']['value']
                    or 'ontology' in x['p']['value'])
            ]))

    if e1_res and e2_res:
        for predicate in intersection(e1_res, e2_res):
            if 'wiki' not in predicate:
                with open('dataset.txt', 'a+', encoding='utf8') as dest:
                    dest.write("{}\t{}\t{}\n".format(e1, predicate, e2))
Beispiel #12
0
def execute_sparql_query(query):

    #first try the cache
    result = cache.get(query)
    if result is not None:
        return result

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.addDefaultGraph("http://dbpedia.org")
    sparql.setReturnFormat(JSON)
    sparql.setQuery(query)

    result = sparql.query().convert()["results"]["bindings"]
    cache.put(query, result)
    return result
Beispiel #13
0
 def _graph_sparql(self, named_graph, query):
     """Execute SPARQL query on the Graph Store."""
     store_api = "{0}/query".format(self.request_address)
     try:
         sparql = SPARQLWrapper(store_api)
         # add a default graph, though that can also be in the query string
         sparql.addDefaultGraph(named_graph)
         sparql.setQuery(query)
         data = sparql.query().convert()
     except Exception as error:
         app_logger.error('Something is wrong: {0}'.format(error))
         raise
     else:
         app_logger.info(
             'Execture SPARQL query on named graph: {0}.'.format(
                 named_graph))
         return data.toxml()
Beispiel #14
0
def execute_sparql_query(query, cached=True):
    #first try the cache
    if cached:
        result = cache.get(query)
        if result is not None:
            return result

    #if no result in cache
    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    # sparql = SPARQLWrapper("http://35.196.96.177:8890/sparql")
    sparql.addDefaultGraph("http://dbpedia.org")
    sparql.setReturnFormat(JSON)
    sparql.setQuery(query)
    result = sparql.query().convert()["results"]["bindings"]

    cache.put(query, result)

    return result if result else None
Beispiel #15
0
def _sparql_super_class(type: str):
    super = []
    sparql = SPARQLWrapper(sparql_help["dbpedia"]["endpoint"])
    sparql.addDefaultGraph(sparql_help["dbpedia"]["default_graph"])
    ressource_uri = "<" + type + ">"
    query = (
        "PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#> select ?super where {"
        + ressource_uri + "rdfs:subClassOf ?super}")
    sparql.setQuery(query)
    try:
        sparql.setReturnFormat(JSON)
        results = sparql.query()
        triples = results.convert()
        for t in triples["results"]["bindings"]:
            super.append(t["super"]["value"])
    except Exception:
        print("query failed")
        return None
    return super
Beispiel #16
0
def run_query():
    # pprint.pprint(request)
    # sys.exit()

    _graphURI = request.form['inputGraphURI']
    _queryText = request.form['inputQuery']

    pprint.pprint(_graphURI)
    pprint.pprint(_queryText)

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")

    sparql.addDefaultGraph(_graphURI)
    sparql.setQuery(_queryText)

    sparql.setReturnFormat(JSON)
    results = sparql.query()

    results = results.convert()
    return json.dumps(results)
Beispiel #17
0
def returnTypeDBpedia():
    _types = []

    sparql = SPARQLWrapper("http://dbpedia.org/sparql")
    sparql.addDefaultGraph("http://dbpedia.org")

    query = "select ?type {"
    query += "   ?type a owl:Class ."
    query += "}"

    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()

    for result in results["results"]["bindings"]:
        type_ = str(result["type"]["value"])
        data = type_.split('/')
        _types.append(data[4])

    return _types
def execute_query(query_path, dataset):
    sparql = SPARQLWrapper("http://127.0.0.1:8890/sparql")

    f = open(query_path, "r")
    query = f.read()

    sparql.addDefaultGraph("http://" + dataset.lower() +
                           ".lingbm.morphgraphql.oeg-upm.net/")
    sparql.setQuery(query)
    sparql.setReturnFormat(JSON)

    #print(query)

    s = time.time()

    d = sparql.query()

    delta = time.time() - s

    #print(d.convert())

    return delta
Beispiel #19
0
def get_types_from_endpoint(query_info: dict, ressource_uri: str):
    types = []
    sparql = SPARQLWrapper(query_info["endpoint"])
    sparql.addDefaultGraph(query_info["default_graph"])
    additional_prefix = (query_info["additional_prefix"]
                         if "additional_prefix" in query_info else "")
    ressource_uri = (query_info["resource_prefix"] + ressource_uri
                     if "resource_prefix" in query_info else "<" +
                     ressource_uri + ">")
    query = (
        additional_prefix +
        "\nPREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> select ?type where {"
        + ressource_uri + " rdf:type ?type } LIMIT 50")
    sparql.setQuery(query)
    try:
        sparql.setReturnFormat(JSON)
        results = sparql.query()
        triples = results.convert()
        for t in triples["results"]["bindings"]:
            types.append(t["type"]["value"])
    except Exception:
        print("query failed")
        print(query)
    return types
Beispiel #20
0
def main():
    # Track executing time
    # start_time = time.time()
    headers = {'content-type': 'application/json'}  # HTTP header content type
    # Configurations
    config = ConfigParser()
    config.read('config.ini')
    
    endpoint_uri = config['Mandatory']['endpointURI']
    graph_uri = config['Mandatory']['graphURI']
    pool_uri = (config['Mandatory']['poolURI']).split(',')
    type_uri = (config['Mandatory']['typeURI']).split(',')

    # Set up endpoint and access to triple store
    sparql = SPARQLWrapper(endpoint_uri)
    sparql.setReturnFormat(JSON)
    sparql.setMethod(POST)
    store = SPARQLUpdateStore(endpoint_uri, endpoint_uri)

    # Specify the (named) graph we're working with
    sparql.addDefaultGraph(graph_uri)

    # Create an in memory graph
    g = Graph(store, identifier=graph_uri)

    # Build the RDF from the JSON source data
    # This function is to be called for each URL in the pool to harvest, in case that the source is in json, with the Estonian mapping
    def rdf(urlrdf, f):
        input = Graph()
        input.open("store2", create=True)
        input.parse(urlrdf, format=f)
        
        for s, p, o in input:
            g.add((s, p, o))

        input.close()

    # Set counter
    c = 0

    # Loop over all URI in the pool
    while c < len(pool_uri):
        print(pool_uri[c],type_uri[c])
        if type_uri[c] == 'jsonEstonia':
            try:
                # Fetch the JSON data
                response = requests.get(pool_uri[c], headers=headers).json()

                # Process the response
                configJSON = ConfigParser()
                configJSON.read('mapping_estonia.ini')
                json_to_rdf(pool_uri[c], response, g, configJSON)
    
            except ValueError as e:
                print(e)
                
        if type_uri[c] == 'xml' or type_uri[c] == 'turtle' or type_uri[c] == 'nt':
            rdf(pool_uri[c], type_uri[c])


        # Counter update
        c += 1

    # Iterate over triples in store and print them out.
    print('\r\nNumber of triples added: %d' % len(g))
    
    # Cleanup the graph instance
    g.close()
config.read('config_3.ini')

URI = sys.argv[1]
classType = sys.argv[2]

endpoint_uri = config['Mandatory']['endpointURI']
graph_uri = config['Mandatory']['graphURI']

# Set up endpoint and access to triple store
sparql = SPARQLWrapper(endpoint_uri)
sparql.setReturnFormat(JSON)
sparql.setMethod(POST)
store = SPARQLUpdateStore(endpoint_uri, endpoint_uri)

# Specify the (named) graph we're working with
sparql.addDefaultGraph(graph_uri)

# Create an in memory graph
g = Graph(store, identifier=graph_uri)

query = "select ?p ?o where {<"+ URI +"> ?p ?o}"
properties = g.query (query)

# Configurations mappings
mapping = ConfigParser()
mapping.read('mapping_fields.ini')

propURI = ""
props = ""
for row in properties:
	propURI = str(row[0])
Beispiel #22
0
class BrickSparql(object):

    def __init__(
        self,
        sparql_url,
        brick_version,
        graph,
        base_ns,
        username='******',
        password='******',
        update_url=None,
        httpauth_type=DIGEST,
    ):
        self.BRICK_VERSION = brick_version
        self.sparql_url = sparql_url
        self.update_url = update_url
        self.BASE = Namespace(base_ns)
        self.base_graph = graph
        self.BRICK = Namespace(
            'https://brickschema.org/schema/{0}/Brick#'\
            .format(self.BRICK_VERSION))
        self.BRICK_USE = Namespace(
            'https://brickschema.org/schema/{0}/BrickUse#'\
            .format(self.BRICK_VERSION))
        self.BF = Namespace(
            'https://brickschema.org/schema/{0}/BrickFrame#'\
            .format(self.BRICK_VERSION))
        self.BRICK_TAG = Namespace(
            'https://brickschema.org/schema/{0}/BrickTag#'\
            .format(self.BRICK_VERSION))

        PROV = Namespace('http://www.w3.org/ns/prov#')

        self.namespaces = {
            '': self.BASE,
            'base': self.BASE,
            'brick':self.BRICK,
            'bf': self.BF,
            'brick_tag': self.BRICK_TAG,
            'brick_use': self.BRICK_USE,
            'rdfs': RDFS,
            'rdf': RDF,
            'owl': OWL,
            'foaf': FOAF,
            'prov': PROV,
        }

        self.init_q_prefix()

        self.init_sparql(self.sparql_url, username, password, httpauth_type)

    def init_sparql(self, sparql_url, username, password, httpauth_type):
        if not self.update_url:
            update_url = sparql_url + '-auth'
        else:
            update_url = self.update_url
        self.sparql = SPARQLWrapper(endpoint=sparql_url, updateEndpoint=update_url)
        if self.base_graph:
            self.sparql.addDefaultGraph(self.base_graph)
        self.sparql.queryType = SELECT
        self.sparql.setCredentials(username, password)
        self.sparql.setHTTPAuth(httpauth_type)

    def init_q_prefix(self):
        self.q_prefix = ''
        for prefix, ns in self.namespaces.items():
            if 'uri' in dir(ns):
                ns_n3 = ns.uri.n3()
            else:
                ns_n3 = ns[''].n3()

            self.q_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3)
        self.q_prefix += '\n'

    def _get_sparql(self):
        # If need to optimize accessing sparql object.
        return self.sparql

    def _format_select_res(self, raw_res):
        var_names = raw_res['head']['vars']
        tuples = [[row[var_name]['value'] for var_name in var_names]
              for row in raw_res['results']['bindings']]
        #TODO: Below line is a hack.
        var_names = ['?'+var_name for var_name in var_names]
        return {
            'var_names': var_names,
            'tuples': tuples
        }

    def parse_result(self, res):
        raw_res = res
        common_res = res
        return common_res, raw_res

    def add_graphs_to_select_qstr(self, qstr, graphs=[]):
        if not graphs:
            return qstr
        [prefix, body] = re.split(re.compile('where', re.IGNORECASE), qstr)
        graph_body = '\n'
        for graph in graphs:
            graph_body += 'FROM <{0}>\n'.format(graph)
        return prefix + graph_body + 'where ' + body

    def add_graphs_to_insert_qstr(self, qstr, graphs=[]):
        if graphs:
            graph = graphs[0]
        else:
            graph = self.base_graph
        qstr = 'WITH <{0}>\n'.format(graph) + qstr
        return qstr

    def add_graphs_to_insert_qstr_dep(self, qstr, graphs=[]):
        assert len(graphs) <= 1, 'Cannot insert into multiple graphs. Choose a graph or no graph'
        #if not graphs:
        #    return qstr
        if graphs:
            graph = graphs[0]
        else:
            graph = self.base_graph
        [graph_prefix, body] = re.split(re.compile('insert{', re.IGNORECASE), qstr)
        graph_prefix += 'INSERT{\n'
        graph_prefix += 'GRAPH <{0}> {{'.format(graph)
        splitted_body = re.split('}', body)
        insert_body = splitted_body[0]
        augmented_qstr = graph_prefix + insert_body + '  }\n}' + '}'.join(splitted_body[1:])
        return augmented_qstr

    def update(self, qstr, graphs=[]):
        sparql = self._get_sparql()
        sparql.setMethod(POST)
        sparql.setReturnFormat(JSON)
        query_type = qstr[:6]
        if query_type.upper() == 'INSERT':
            qstr = self.add_graphs_to_insert_qstr(qstr, graphs)
        elif graphs:
            raise Exception('not implemented yet')
        qstr = self.q_prefix + qstr
        sparql.setQuery(qstr)
        raw_res = sparql.query().convert()
        if sparql.queryType == SELECT:
            res = self._format_select_res(raw_res)
        elif sparql.queryType in [INSERT, LOAD, DELETE]:
            res = raw_res # TODO: Error handling here
        return res

    def raw_query(self, qstr):
        return self.query(qstr)  # TODO: How to handle different graphs?

    def query(self, qstr, graphs=[], is_update=False):
        sparql = self._get_sparql()
        sparql.setMethod(POST)
        sparql.setReturnFormat(JSON)
        qstr = self.q_prefix + qstr
        if not is_update:  # TODO: Implement this for update as well.
            qstr = self.add_graphs_to_select_qstr(qstr, graphs)
        sparql.setQuery(qstr)
        raw_res = sparql.query().convert()
        if sparql.queryType == SELECT:
            res = self._format_select_res(raw_res)
        elif sparql.queryType in [INSERT, LOAD, DELETE]:
            res = raw_res # TODO: Error handling here
        return res

    def _create_insert_query(self, triples, graph=None):
        if not graph:
            graph = self.base_graph
        q = 'INSERT DATA {\n'
        if graph:
            q += '  GRAPH <{0}> {{'.format(graph)
        for triple in triples:
            triple_str = ' '.join([term.n3() for term in triple]) + ' .\n'
            q += triple_str
        q += '}\n'
        if graph:
            q += '}'
        return q

    def _create_delete_query(self, triples, graph=None):
        if not graph:
            graph = self.base_graph
        q = """
            DELETE DATA {{
                GRAPH <{0}> {{
            """.format(graph)
        for triple in triples:
            triple_str = ' '.join([term.n3() for term in triple]) + ' .\n'
            q += triple_str
        q += """}
            }
            """
        return q

    def _is_bool(self, s):
        s = s.lower()
        if s == 'true' or s == 'false':
            return True
        else:
            return False

    def _str2bool(self, s):
        s = s.lower()
        if s == 'true':
            return True
        elif s == 'false':
            return False
        else:
            raise Exception('{0} is not convertible to boolean'.format(s))

    def _is_float(self, s):
        try:
            float(s)
            return True
        except ValueError:
            return False

    def _parse_term(self, term):
        if isinstance(term, rdflib.term.Identifier):
            return term
        elif isinstance(term, str):
            if 'http' == term[0:4]:
                node = URIRef(term)
            elif ':' in term: #TODO: This condition is dangerous.
                [ns, id_] = term.split(':')
                ns = self.namespaces[ns]
                node = ns[id_]
            else:
                if term.isdigit():
                    term = int(term)
                elif self._is_float(term):
                    term = float(term)
                if self._is_bool(term):
                    term = _str2bool(term)
                node = Literal(term)
        else:
            node = Literal(term)
        return node

    def add_ns_prefix(self, ns, prefix):
        ns = Namespace(ns)
        self.namespaces[prefix] = ns
        self.init_q_prefix()

    def make_triple(self, pseudo_s, pseudo_p, pseudo_o, graph=None):
        if not graph:
            graph = self.base_graph
        s = self._parse_term(pseudo_s)
        p = self._parse_term(pseudo_p)
        o = self._parse_term(pseudo_o)
        return (s, p, o)

    def add_triple(self, pseudo_s, pseudo_p, pseudo_o, graph=None):
        self.add_triples([(pseudo_s, pseudo_p, pseudo_o)], graph)

    def add_triples(self, pseudo_triples, graph=None):
        if not graph:
            graph = self.base_graph
        triples = [self.make_triple(*pseudo_triple) for pseudo_triple in pseudo_triples]
        q = self._create_insert_query(triples, graph)
        res = self.query(q, is_update=True)

    def delete_triple(self, pseudo_s, pseudo_p, pseudo_o, graph=None):
        self.delete_triples([(pseudo_s, pseudo_p, pseudo_o)], graph)

    def delete_triples(self, pseudo_triples, graph=None):
        if not graph:
            graph = self.base_graph
        triples = [self.make_triple(*pseudo_triple) for pseudo_triple in pseudo_triples]
        q = self._create_delete_query(triples, graph)
        res = self.query(q, is_update=True)

    def load_schema(self):
        if semver_compare(self.BRICK_VERSION, '1.1.0') < 0:
            schema_ns = [self.BRICK, self.BRICK_USE, self.BF, self.BRICK_TAG]
        else:
            schema_ns = [self.BRICK]
        schema_urls = [str(ns)[:-1] + '.ttl' for ns in schema_ns]
        load_query_template = 'LOAD <{schema_url}>'
        if self.base_graph:
            load_query_template += ' into <{0}>'.format(self.base_graph)
        for schema_url in schema_urls:
            qstr = load_query_template.format(schema_url=schema_url.replace('https', 'http'))
            res = self.query(qstr)


    def load_rdffile(self, f, graph=None):
        if not graph:
            graph = self.base_graph
        if (isinstance(f, str) and os.path.isfile(f)) or isinstance(f, StringIO):
            # TODO: Optimize this with using Virtuoso API directly
            new_g = rdflib.Graph()
            new_g.parse(f, format='turtle')
            res = [row for row in new_g.query('select ?s ?p ?o where {?s ?p ?o.}')]
            for rows in striding_windows(res, 500):
                self.add_triples(rows, graph=graph)
        elif isinstance(f, str) and validators.url(f):
            raise Exception('Load ttl not implemented for {0}'.format('url'))
        else:
            raise Exception('Load ttl not implemented for {0}'.format(type(f)))

    def add_brick_instance(self, entity_id, tagset, ns_prefix=None, graph=None):
        if not isinstance(entity_id, URIRef):
            if ns_prefix:
                ns = self.namespaces[ns_prefix]
                entity = ns[entity_id]
            else:
                entity = URIRef(entity_id)
        else:
            entity = entity_id
        tagset = self.BRICK[tagset]
        triples = [(entity, RDF.type, tagset)]
        self.add_triples(triples, graph)
        return entity
Beispiel #23
0
class Spar(object):
    '''
    class to query DBpedia for URI identity;
    URIs are identical when connected by 
    page redirects
    '''
    def __init__(self):
        '''
        constructor
        '''

        self.sparql = SPARQLWrapper("http://dbpedia.org/sparql")
        self.sparql.addDefaultGraph("http://dbpedia.org")

    def isEqualTest(self, uri1, uri2):
        '''
        check for identity of URIs (test)
        '''

        spar = ("ASK {<" + uri1 + "> "
                "(<http://dbpedia.org/ontology/wikiPageRedirects>|"
                "^<http://dbpedia.org/ontology/wikiPageRedirects>)* "
                "<" + uri2 + ">}")

        #print spar

        return self.evalQuery(spar)

    def isEqual(self, uri1, uri2):
        '''
        check for identity of URIs
        '''

        spar = ("ASK {<" + uri1 + "> "
                "(<http://dbpedia.org/ontology/wikiPageRedirects>|"
                "^<http://dbpedia.org/ontology/wikiPageRedirects>)* "
                "<" + uri2 + ">}")
        #print self.evalRes(self.evalQuery(spar))
        return True and self.evalRes(self.evalQuery(spar))

    def evalQuery(self, query):
        '''
        evaluate query remotely
        '''

        time.sleep(1)  # run one query per second
        try:
            self.sparql.setReturnFormat(JSON)
            self.sparql.setQuery(query)
            print "running", query
            return self.sparql.query().convert()
        except:
            return False

    def evalRes(self, results):
        '''
        check for value of ask query
        '''

        if results == False:
            return results
        else:
            return results['boolean']

    def printRes(self, results):
        '''
        print raw JSON results (tests)
        '''

        print results

    def test(self):
        '''
        test if it works
        '''

        uri1 = "http://dbpedia.org/resource/NaN"
        uri2 = "NaN"
        for i in range(20):
            print "query", i, self.isEqual(uri1, uri2)


# # execute
# if __name__ == '__main__':
#     sp = Spar()
#     sp.test()
Beispiel #24
0
from SPARQLWrapper import SPARQLWrapper, JSON ,XML , POST, DIGEST
import json

sparql = SPARQLWrapper("http://localhost:8890/sparql")
sparql.addDefaultGraph("http://www.Gokdepartments.org")

def selDeptList():
    sparql.setQuery("""
        SELECT *
        {?Org <http://www.w3.org/ns/org#Name> ?name.
        ?Org <http://www.w3.org/ns/org#DepartmentID> ?ID.}
        """)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()
    dept=[]
    for result in results["results"]["bindings"]:
       dept.append({
            'name': result["name"]["value"],
            'id': result["ID"]["value"]
        })  
    return (json.dumps(dept))

def selDeptDetail(deptId):
    sparql.setQuery("""
        PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>	
        PREFIX org: <http://www.w3.org/ns/org#>
        SELECT ?name ?ID ?altname ?location ?url ?phone ?mail ?pincode ?addresse 
        where
        {
        ?d rdf:type <http://www.w3.org/ns/org#FormalOrganization>.
        ?d org:Name ?name.
Beispiel #25
0
class WriterPlugin(RDFWriter):
    def __init__(self, reader, *args, **kwargs):
        super(WriterPlugin, self).__init__(reader, *args, **kwargs)

        if isinstance(self.reader, ReaderPlugin):
            self._endpoint = self.reader.endpoint
        else:
            self._endpoint = kwargs.get("endpoint")

        self._combine_queries = kwargs.get("combine_queries")
        self._results_format = JSON

        self._sparql_wrapper = SPARQLWrapper(self._endpoint,
                                             returnFormat=self._results_format)
        user = kwargs.get('user', None)
        password = kwargs.get('password', None)
        if user is not None and password is not None:
            self._sparql_wrapper.setCredentials(user, password)

        self._sparql_wrapper.setMethod("POST")

        default_graph = kwargs.get('default_graph', None)
        if default_graph:
            self._sparql_wrapper.addDefaultGraph(default_graph)

    @property
    def endpoint(self):
        return self._endpoint

    def _save(self, *resources):
        for context, items in _group_by_context(resources).items():
            # Deletes all triples with matching subjects.
            remove_query = _prepare_delete_many_query(items, context)
            insert_query = _prepare_add_many_query(items, context)
            self._execute(remove_query, insert_query)

    def _update(self, *resources):
        for context, items in _group_by_context(resources).items():
            # Explicitly enumerates triples for deletion.
            remove_query = _prepare_selective_delete_query(items, context)
            insert_query = _prepare_add_many_query(items, context)
            self._execute(remove_query, insert_query)

    def _remove(self, *resources, **kwargs):
        for context, items in _group_by_context(resources).items():
            # Deletes all triples with matching subjects.
            inverse = kwargs.get("inverse")
            query = _prepare_delete_many_query(items, context, inverse)
            self._execute(query)

    def _size(self):
        """ Return total count of triples, not implemented. """
        raise NotImplementedError

    def _add_triple(self, s=None, p=None, o=None, context=None):
        self._add(s, p, o, context)

    def _set_triple(self, s=None, p=None, o=None, context=None):
        self._remove_from_endpoint(s, p, context=context)
        self._add(s, p, o, context)

    def _remove_triple(self, s=None, p=None, o=None, context=None):
        self._remove_from_endpoint(s, p, o, context)

    def _execute(self, *queries):
        """ Execute several queries. """

        translated = [str(query) for query in queries]
        if self._combine_queries:
            translated = ["\n".join(translated)]

        try:
            for query_str in translated:
                debug(query_str)

                self._sparql_wrapper.setQuery(query_str)
                self._sparql_wrapper.query()

            return True

        except EndPointNotFound as _:
            raise_(SparqlWriterException, "Endpoint not found",
                   sys.exc_info()[2])
        except QueryBadFormed as _:
            raise_(SparqlWriterException, "Bad query: %s" % query_str,
                   sys.exc_info()[2])
        except Exception as e:
            msg = "Exception: %s (query: %s)" % (e, query_str)
            raise_(SparqlWriterException, msg, sys.exc_info()[2])

    def _add_many(self, triples, context=None):
        debug("ADD several triples")
        query = insert()

        if context:
            query.into(context)

        for s, p, o in triples:
            query.template((s, p, o))

        query_str = str(query)
        try:
            debug(query_str)
            self._sparql_wrapper.setQuery(query_str)
            self._sparql_wrapper.query().convert()
            return True

        except EndPointNotFound as _:
            raise_(SparqlWriterException, "Endpoint not found",
                   sys.exc_info()[2])
        except QueryBadFormed as _:
            raise_(SparqlWriterException, "Bad query: %s" % query_str,
                   sys.exc_info()[2])
        except Exception as e:
            raise_(SparqlWriterException, "Exception: %s" % e,
                   sys.exc_info()[2])

    def _add(self, s, p, o, context=None):
        return self._add_many([(s, p, o)], context)

    def _remove_from_endpoint(self, s=None, p=None, o=None, context=None):
        debug('REM : %s, %s, %s, %s' % (s, p, o, context))

        query = delete()
        try:
            if s is None and p is None and o is None and context:
                query = clear().graph(context)
            else:
                if context:
                    query = delete().from_(context)

                query.template(("?s", "?p", "?o"))

                if context:
                    where_group = NamedGroup(context)
                else:
                    where_group = Group()

                where_group.append(("?s", "?p", "?o"))
                filter = Filter("({0})".format(self.__build_filter(s, p, o)))
                where_group.append(filter)
                query.where(where_group)

            query_str = str(query)
            debug(query_str)
            self._sparql_wrapper.setQuery(query_str)
            self._sparql_wrapper.query().convert()
            return True
        except EndPointNotFound as _:
            error("SPARQL endpoint not found")
        except QueryBadFormed as _:
            error("Bad-formed SPARQL query")
        except SPARQLWrapperException as _:
            error("SPARQLWrapper exception")

        return None

    def __build_filter(self, s, p, o):
        vars = [(s, '?s'), (p, '?p'), (o, '?o')]
        parts = []
        for var in vars:
            if var[0] is not None:
                parts.append("%s = %s" % (var[1], self._term(var[0])))

        return " and ".join(parts)

    def index_triples(self, **kwargs):
        """
        performs index of the triples if such functionality is present,
        returns True if operation successful
        """
        # SPARQL/Update does not support indexing operation
        return False

    def load_triples(self, source=None, context=None):
        """
        Load resources on the web into the triple-store.

        :param str source: path to the sources of triples to load
        :param context: the given context
        :return: True if successful
        :rtype: bool
        """
        if source:
            query = load()
            query.load(remote_uri=source)

            if context:
                query.into(context)

            query_str = str(query)
            debug(query_str)
            self._sparql_wrapper.setQuery(query_str)
            self._sparql_wrapper.query().convert()
            return True

        return False

    def _clear(self, context=None):
        """
        Clear the triple-store.
        """
        self._remove_from_endpoint(None, None, None, context=context)

    def _term(self, term):
        if isinstance(term, (URIRef, BNode)):
            return u'{0:s}'.format
        elif isinstance(term, (str, str)):
            if term.startswith('?'):
                return u'{0:s}'.format(term)
            elif is_uri(term):
                return u'<{0:s}>'.format(term)
            else:
                return u'"{0:s}"'.format(term)
        elif type(term) is Literal:
            return term.n3()
        elif isinstance(term, (list, tuple)):
            return '"{0:s}"@{1:s}'.format(term[0], term[1])
        elif type(term) is type and hasattr(term, 'uri'):
            return u'{0:s}'.format
        elif hasattr(term, 'subject'):
            return u'{0:s}'.format

        return term.__str__()
class BrickEndpoint(object):
    def __init__(self,
                 sparql_url,
                 brick_version,
                 base_ns='',
                 load_schema=True):
        self.BRICK_VERSION = brick_version
        self.sparql_url = sparql_url
        self.sparql = SPARQLWrapper(endpoint=self.sparql_url,
                                    updateEndpoint=self.sparql_url + '-auth')
        self.sparql.queryType = SELECT
        self.sparql.setCredentials('dba', 'dba')
        self.sparql.setHTTPAuth(DIGEST)
        if not base_ns:
            base_ns = 'http://example.com/'
        self.BASE = Namespace(base_ns)
        self.base_graph = base_ns.strip('/')
        self.sparql.addDefaultGraph(self.base_graph)
        self.sparql.addParameter("default-graph-uri", self.base_graph)
        self.BRICK = Namespace(
            'https://brickschema.org/schema/{0}/Brick#'\
            .format(self.BRICK_VERSION))
        self.BRICK_USE = Namespace(
            'https://brickschema.org/schema/{0}/BrickUse#'\
            .format(self.BRICK_VERSION))
        self.BF = Namespace(
            'https://brickschema.org/schema/{0}/BrickFrame#'\
            .format(self.BRICK_VERSION))
        self.BRICK_TAG = Namespace(
            'https://brickschema.org/schema/{0}/BrickTag#'\
            .format(self.BRICK_VERSION))

        self.namespaces = {
            '': self.BASE,
            'base': self.BASE,
            'brick': self.BRICK,
            'bf': self.BF,
            'brick_tag': self.BRICK_TAG,
            'brick_use': self.BRICK_USE,
            'rdfs': RDFS,
            'rdf': RDF,
            'owl': OWL,
            'foaf': FOAF
        }
        self.q_prefix = ''
        #self.q_prefix = 'DEFINE input:inference <adxrules>\n'
        for prefix, ns in self.namespaces.items():
            if 'uri' in dir(ns):
                ns_n3 = ns.uri.n3()
            else:
                ns_n3 = ns[''].n3()

            self.q_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3)
        self.q_prefix += '\n'

        self._init_brick_constants()
        if load_schema:
            self.load_schema()
        self.init_topclasses()

    def add_namespace(self, prefix, ns):
        ns = Namespace(ns)
        self.namespaces[prefix] = ns
        if 'uri' in dir(ns):
            ns_n3 = ns.uri.n3()
        else:
            ns_n3 = ns[''].n3()

        self.q_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3)

    def _init_brick_constants(self):
        self.HAS_LOC = URIRef(self.BF + 'hasLocation')

    def _get_sparql(self):
        # If need to optimize accessing sparql object.
        return self.sparql

    def update(self, qstr):
        return self.query(qstr, is_update=True)

    def _format_select_res(self, raw_res):
        var_names = raw_res['head']['vars']
        values = [[
            row[var_name]['value'] if var_name in row else None
            for var_name in var_names
        ] for row in raw_res['results']['bindings']]
        var_names = ['?' + var_name for var_name in var_names]
        return [var_names, values]

    def parse_result(self, res):
        raw_res = res
        common_res = res
        return common_res, raw_res

    def raw_query(self, qstr):
        return self.query(qstr)

    def query(self, qstr, is_update=False):
        sparql = self._get_sparql()
        if is_update:
            sparql.setMethod(POST)
        else:
            sparql.setMethod(GET)
        sparql.setReturnFormat(JSON)
        qstr = self.q_prefix + qstr
        sparql.setHTTPAuth
        sparql.setQuery(qstr)
        raw_res = sparql.query().convert()
        if sparql.queryType == SELECT:
            res = self._format_select_res(raw_res)
        elif sparql.queryType == INSERT:
            res = raw_res  # TODO: Error handling here
        elif sparql.queryType == 'LOAD':
            res = raw_res  # TODO: Error handling here
        return res

    def _create_insert_query(self, triples):
        q = """
            INSERT DATA {{
                GRAPH <{0}> {{
            """.format(self.base_graph)
        for triple in triples:
            triple_str = ' '.join([term.n3() for term in triple]) + ' .\n'
            q += triple_str
        q += """}
            }
            """
        return q

    def _is_bool(self, s):
        s = s.lower()
        if s == 'true' or s == 'false':
            return True
        else:
            return False

    def _str2bool(self, s):
        s = s.lower()
        if s == 'true':
            return True
        elif s == 'false':
            return False
        else:
            raise Exception('{0} is not convertible to boolean'.format(s))

    def _is_float(self, s):
        try:
            float(s)
            return True
        except:
            return False

    def _parse_term(self, term):
        if isinstance(term, URIRef) or isinstance(term, Literal):
            return term
        elif isinstance(term, str):
            if 'http' == term[0:4]:
                node = URIRef(term)
            elif ':' in term:  #TODO: This condition is dangerous.
                [ns, id_] = term.split(':')
                ns = self.namespaces[ns]
                node = ns[id_]
            else:
                if term.isdigit():
                    term = int(term)
                elif self._is_float(term):
                    term = float(term)
                elif self._is_bool(term):
                    term = self._str2bool(term)
                else:
                    # Otherwise, just str
                    pass
                node = Literal(term)
        else:
            node = Literal(term)
        return node

    def add_triple(self, pseudo_s, pseudo_p, pseudo_o):
        triple = self.make_triple(pseudo_s, pseudo_p, pseudo_o)
        return self._add_triples([triple])

    def make_triple(self, pseudo_s, pseudo_p, pseudo_o):
        s = self._parse_term(pseudo_s)
        p = self._parse_term(pseudo_p)
        o = self._parse_term(pseudo_o)
        return (s, p, o)

    def add_triples(self, pseudo_triples):
        if not pseudo_triples:
            # TODO: Define the right format same ass _add_triples.
            return True
        triples = [
            self.make_triple(*pseudo_triple)
            for pseudo_triple in pseudo_triples
        ]
        return self._add_triples(triples)

    def _add_triples(self, triples):
        q = self._create_insert_query(triples)
        res = self.update(q)
        return res

    def add_brick_instance(self, entity_name, tagset):
        entity = URIRef(self.BASE + entity_name)
        tagset = URIRef(self.BRICK + tagset)
        triples = [(entity, RDF.type, tagset)]
        self._add_triples(triples)
        return str(entity)

    def load_ttlfile(self, filepath):
        q = """
        load <file://{0}> into <{1}>
        """.format(filepath, self.base_graph)
        res = self.update(q)

    def load_schema(self):
        schema_urls = [
            str(ns)[:-1] + '.ttl'
            for ns in [self.BRICK, self.BRICK_USE, self.BF, self.BRICK_TAG]
        ]
        load_query_template = 'LOAD <{0}> into <{1}>'
        for schema_url in schema_urls:
            qstr = load_query_template.format(
                schema_url.replace('https', 'http'), self.base_graph)
            res = self.update(qstr)

    def init_topclasses(self, force=False):
        topclasses_file = 'Brick/topclasses.json'
        if os.path.isfile(topclasses_file) and not force:
            with open(topclasses_file, 'r') as fp:
                self.topclasses = json.load(fp)
        else:
            self.topclasses = {}
            target_topclasses = ['Point', 'Equipment', 'Location']
            qstr_template = """
            select ?tagset where {{
            ?tagset rdfs:subClassOf* brick:{0}.
            }}
            """
            for topclass in target_topclasses:
                qstr = qstr_template.format(topclass)
                res = self.query(qstr)
                for [tagset] in res[1]:
                    self.topclasses[tagset.split('#')
                                    [-1].lower()] = topclass.lower()
            with open(topclasses_file, 'w') as fp:
                json.dump(self.topclasses, fp, indent=2)

    def sparqlres2df(self, res):
        column_names = res[0]
        data = res[1]
        return pd.DataFrame(data=data, columns=column_names)

    def sparqlres2csv(self, res, filename):
        self.sparqlres2df(res).to_csv(filename)

    def get_top_class(self, tagset):
        pass

    def get_tagset_type(self, tagset):
        pure_tagset = tagset.split('-')[0]
        postfix = pure_tagset.split('_')[-1].lower()
        if postfix in [
                'server',
                'networkadapter',
        ]:
            return 'networkadapter'
        else:
            topclass = self.topclasses.get(pure_tagset, None)
            if topclass:
                return topclass.lower()
            else:
                if postfix in [
                        'sensor',
                        'setpoint',
                        'status',
                        'alarm',
                        'command',
                ]:
                    return 'point'
                else:
                    return 'unidentified'

    def normalize2uri(self, s):
        return '_'.join(re.findall('[a-zA-Z0-9]+', s))

    def serialize_graph(self, filename, nobrick=True):
        g = rdflib.Graph()
        qstr = """
        select ?s ?p ?o where {
        ?s ?p ?o.
        }
        """
        res = self.query(qstr)
        for row in res[1]:
            g.add((URIRef(row[0]), URIRef(row[1]), URIRef(row[2])))
        g.serialize(filename, format='turtle')

    def check_tag_in_tagset(self, tag, tagset):
        TODO
Beispiel #27
0
class Sparql(object):
    def __init__(self, entities, config_file, dataset, endpoint, default_graph,
                 entity_class):

        self.entities = entities  # file containing a list of entities

        self.dataset = dataset

        self.wrapper = SPARQLWrapper(endpoint)

        self.wrapper.setReturnFormat(JSON)

        if default_graph:

            self.default_graph = default_graph

            self.wrapper.addDefaultGraph(self.default_graph)

        self.entity_class = entity_class

        self.query_prop = "SELECT ?s ?o  WHERE {?s %s ?o. }"

        self.query_prop_uri = "SELECT ?s ?o  WHERE {?s %s ?o. FILTER (?s = %s)}"

        self._define_properties(config_file)

    def _define_properties(self, config_file):

        with codecs.open(config_file, 'r', encoding='utf-8') as config_read:

            property_file = json.loads(config_read.read())

        try:

            self.properties = [i for i in property_file[self.dataset]]
            print(self.properties)

        except KeyError:

            print("No set of properties provided in the dataset")

            if not self.entity_class:

                query_all_prop = "SELECT distinct ?p " \
                                 "WHERE {?s ?p ?o. FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))}"

                self._get_properties(query_all_prop)

            else:

                query_category_prop = "select distinct ?p " \
                                      "where { ?s a dbo:Band. ?s ?p ?o. " \
                                      "FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))} "

                self._get_properties(query_category_prop)

    def _get_properties(
        self, query
    ):  # get all the properties from sparql endpoint if a list is not provided in config file

        self.properties = []

        self.wrapper.setQuery(query)

        self.wrapper.setReturnFormat(JSON)

        for results in self.wrapper.query().convert()['results']['bindings']:

            self.properties.append(results['p']['value'])

        self.properties.append("dct:subject")

        self.properties.append("rdf:type")

    def get_property_graphs(self):

        properties = self.properties

        if 'feedback' in properties:
            properties.remove(
                'feedback')  # don't query for the feedback property

        for prop in properties:  # iterate on the properties

            prop_short = prop

            prop_namespace = prop

            if '/' in prop:

                # avoid creating file with a '/' in the name
                prop_short = prop.split('/')[-1]

                # if it is actually a URI, surround by "<>"
                if prop.startswith("http"):
                    prop_namespace = '<' + prop + '>'

            try:
                mkdir('datasets/%s/' % self.dataset)
                mkdir('datasets/%s/graphs' % self.dataset)

            except:
                pass

            with codecs.open('datasets/%s/graphs/%s.edgelist' %
                             (self.dataset, prop_short),
                             'w',
                             encoding='utf-8'
                             ) as prop_graph:  #open a property file graph

                if self.entities == "all":

                    self.wrapper.setQuery(self.query_prop % prop_namespace)

                    for result in self.wrapper.query().convert(
                    )['results']['bindings']:

                        subj = result['s']['value']

                        obj = result['o']['value']

                        print((subj, obj))

                        prop_graph.write('%s %s\n' % (subj, obj))

                else:

                    with codecs.open(
                            '%s' % self.entities, 'r', encoding='utf-8'
                    ) as f:  # open entity file, select only those entities

                        for uri in f:  # for each entity

                            uri = uri.strip('\n')

                            uri = '<' + uri + '>'

                            self.wrapper.setQuery(self.query_prop_uri %
                                                  (prop_namespace, uri))

                            for result in self.wrapper.query().convert(
                            )['results']['bindings']:

                                subj = result['s']['value']

                                obj = result['o']['value']

                                print((subj, obj))

                                prop_graph.write('%s %s\n' % (subj, obj))

                        f.seek(0)  # reinitialize iterator

    @staticmethod
    def get_uri_from_wiki_id(wiki_id):

        sparql = SPARQLWrapper("http://dbpedia.org/sparql")

        sparql.setQuery(
            """select ?s where {?s <http://dbpedia.org/ontology/wikiPageID> %d
           }""" % int(wiki_id))

        sparql.setReturnFormat(JSON)

        try:
            uri = sparql.query().convert(
            )['results']['bindings'][0]['s']['value']

        except:
            uri = None

        return uri
Beispiel #28
0
class Graph_Linker():
    def __init__(self, *args, **kwargs):
        self.sparql = SPARQLWrapper(os.getenv('SPARQL_ENDPOINT'))
        self.sparql.addDefaultGraph(os.getenv('SPARQL_GRAPH'))

        return super().__init__(*args, **kwargs)

    def link_authors(self):
        print('Linking authors...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?a rdf:type dblp:Author
            }

            WHERE {
                SELECT DISTINCT ?a
                WHERE {
                    ?a dblp:write ?p .
                    FILTER NOT EXISTS {
                        ?a rdf:type dblp:Author
                    }
                }
            }
        """)
        self.sparql.query()
        print('Authors linked to https://dblp.org/ontologies/Author.')

    def link_papers(self):
        print('Linking papers...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX bibo: <http://purl.org/ontology/bibo/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?p rdf:type bibo:AcademicArticle
            }

            WHERE {
                SELECT DISTINCT ?p
                WHERE {
                    ?a dblp:write ?p .
                    FILTER NOT EXISTS {
                        ?p rdf:type bibo:AcademicArticle
                    }
                }
            }
        """)
        self.sparql.query()
        print(
            'Papers linked to http://purl.org/ontology/bibo/AcademicArticle.')

    def link_reviewers(self):
        print('Linking reviewers...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX bibo: <http://purl.org/ontology/bibo/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?s rdf:type dblp:Reviewer
            }

            WHERE {
                SELECT DISTINCT ?s
                WHERE {
                    ?s dblp:writeReview ?r .
                    ?r dblp:about ?p .
                    ?p rdf:type bibo:AcademicArticle .
                    FILTER NOT EXISTS {
                        ?s rdf:type dblp:Reviewer
                    }
                }
            }
        """)
        self.sparql.query()
        print('Reviewers linked to https://dblp.org/ontologies/Reviewer.')

    def link_schools(self):
        print('Linking schools...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?s rdf:type dblp:School
            }

            WHERE {
                SELECT DISTINCT ?s
                WHERE {
                    ?a dblp:affiliatedWith ?s .
                    FILTER ( regex(str(?s), "/schools/" )) .
                    FILTER NOT EXISTS {
                        ?s rdf:type dblp:School
                    }
                }
            }
        """)
        self.sparql.query()
        print('Schools linked to https://dblp.org/ontologies/School.')

    def link_journals(self):
        print('Linking journals...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?j rdf:type dbo:AcademicJournal
            }

            WHERE {
                SELECT DISTINCT ?j
                WHERE {
                    ?p dblp:publishedIn ?j .
                    FILTER ( regex(str(?s), "/journals/" )) .
                    FILTER NOT EXISTS {
                        ?j rdf:type dbo:AcademicJournal
                    }
                }
            }
        """)
        self.sparql.query()
        print(
            'Journals linked to http://dbpedia.org/ontology/AcademicJournal.')

    def link_conferences(self):
        print('Linking conferences...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?c rdf:type dbo:AcademicConference
            }

            WHERE {
                SELECT DISTINCT ?c
                WHERE {
                    ?p dblp:publishedIn ?c .
                    FILTER ( regex(str(?c), "/conf/" )) .
                    FILTER NOT EXISTS {
                        ?c rdf:type dbo:AcademicConference
                    }
                }
            }
        """)
        self.sparql.query()
        print(
            'Conferences linked to http://dbpedia.org/ontology/AcademicConference.'
        )

    def link_random_open_access_journals(self):
        print('Generating and linking random open access journals...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?j rdf:type dblp:OpenAccessJournal
            }

            WHERE {
                SELECT DISTINCT ?j
                WHERE {
                    ?j rdf:type dbo:AcademicJournal .
                    FILTER NOT EXISTS {
                        ?j rdf:type dblp:OpenAccessJournal .
                        ?j rdf:type dblp:CloseAccessJournal
                    }
                }
                ORDER BY RAND()
                LIMIT 200
            }
        """)
        self.sparql.query()
        print(
            'Open access journals generated and linked to https://dblp.org/ontologies/OpenAccessJournal.'
        )

    def link_random_close_access_journals(self):
        print('Generating and linking random close access journals...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?j rdf:type dblp:CloseAccessJournal
            }

            WHERE {
                SELECT DISTINCT ?j
                WHERE {
                    ?j rdf:type dbo:AcademicJournal .
                    FILTER NOT EXISTS {
                        ?j rdf:type dblp:OpenAccessJournal .
                        ?j rdf:type dblp:CloseAccessJournal
                    }
                }
                ORDER BY RAND()
                LIMIT 200
            }
        """)
        self.sparql.query()
        print(
            'Close access journals generated and linked to https://dblp.org/ontologies/CloseAccessJournal.'
        )

    def link_algorithm_conferences(self):
        print('Linking algorithm conferences...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?c rdf:type dblp:AlgorithmConference
            }

            WHERE {
                SELECT DISTINCT ?c
                WHERE {
                    ?c rdf:type dbo:AcademicConference .
                    ?p dblp:publishedIn ?c .
                    ?p dblp:keyword ?k .
                    FILTER(str(?k) IN ('algorithm')) .
                    FILTER NOT EXISTS {
                        ?c rdf:type dblp:AlgorithmConference
                    }
                }
            }
        """)
        self.sparql.query()
        print(
            'Algorithm conferences linked to https://dblp.org/ontologies/AlgorithmConference.'
        )

    def link_network_conferences(self):
        print('Linking network conferences...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?c rdf:type dblp:NetworkConference
            }

            WHERE {
                SELECT DISTINCT ?c
                WHERE {
                    ?c rdf:type dbo:AcademicConference .
                    ?p dblp:publishedIn ?c .
                    ?p dblp:keyword ?k .
                    FILTER(str(?k) IN ('network', 'networks', 'cloud', 'internet', 'wlans')) .
                    FILTER NOT EXISTS {
                        ?c rdf:type dblp:NetworkConference
                    }
                }
            }
        """)
        self.sparql.query()
        print(
            'Network conferences linked to https://dblp.org/ontologies/NetworkConference.'
        )

    def link_database_conferences(self):
        print('Linking database conferences...')
        self.sparql.setQuery("""
            PREFIX dblp: <https://dblp.org/ontologies/>
            PREFIX dbo: <http://dbpedia.org/ontology/>
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>

            INSERT {
                ?c rdf:type dblp:DatabaseConference
            }

            WHERE {
                SELECT DISTINCT ?c
                WHERE {
                    ?c rdf:type dbo:AcademicConference .
                    ?p dblp:publishedIn ?c .
                    ?p dblp:keyword ?k .
                    FILTER(str(?k) IN ('data', 'database', 'databases')) .
                    FILTER NOT EXISTS {
                        ?c rdf:type dblp:DatabaseConference
                    }
                }
            }
        """)
        self.sparql.query()
        print(
            'Database conferences linked to https://dblp.org/ontologies/DatabaseConference.'
        )
Beispiel #29
0
class BrickEndpoint(object):
    def __init__(self,
                 sparql_url,
                 brick_version,
                 base_ns='',
                 load_schema=True):
        BRICK_VERSION = brick_version
        self.sparql_url = sparql_url
        self.sparql = SPARQLWrapper(endpoint=self.sparql_url,
                                    updateEndpoint=self.sparql_url + '-auth')
        self.sparql.queryType = SELECT
        self.sparql.setCredentials('dba', 'dba')
        self.sparql.setHTTPAuth(DIGEST)
        if not base_ns:
            base_ns = 'http://example.com/'
        self.base_graph = 'urn:' + str(gen_uuid())
        self.sparql.addDefaultGraph(self.base_graph)
        self.namespaces = {
            '': BASE,
            'brick': BRICK,
            'bf': BF,
            'rdfs': RDFS,
            'rdf': RDF,
            'owl': OWL,
            'foaf': FOAF
        }
        sparql_prefix = ''
        #for prefix, ns in self.namespaces.items():
        #    ns_n3 = ns.uri.n3()
        #    sparql_prefix += 'prefix {0}: {1}\n'.format(prefix, ns_n3)
        #sparql_prefix += '\n'

        self._init_brick_constants()
        if load_schema:
            self.load_schema()

    def _init_brick_constants(self):
        self.HAS_LOC = URIRef(BF + 'hasLocation')

    def _get_sparql(self):
        # If need to optimize accessing sparql object.
        return self.sparql

    def update(self, qstr):
        return self.query(qstr, is_update=True)

    def _format_select_res(self, raw_res):
        var_names = raw_res['head']['vars']
        values = [{
            var_name: row[var_name]['value'] if var_name in row else None
            for var_name in var_names
        } for row in raw_res['results']['bindings']]
        #var_names = [var_name for var_name in var_names]
        #return [var_names, values]
        return values

    def parse_result(self, res):
        raw_res = res
        common_res = res
        return common_res, raw_res

    def raw_query(self, qstr):
        return self.query(qstr)

    def query(self, qstr, is_update=False):
        sparql = self._get_sparql()
        if is_update:
            sparql.setMethod(POST)
        else:
            sparql.setMethod(GET)
        sparql.setReturnFormat(JSON)
        qstr = sparql_prefix + qstr
        sparql.setHTTPAuth
        sparql.setQuery(qstr)
        raw_res = sparql.query().convert()
        if sparql.queryType == SELECT:
            res = self._format_select_res(raw_res)
        elif sparql.queryType == INSERT:
            res = raw_res  # TODO: Error handling here
        elif sparql.queryType == 'LOAD':
            res = raw_res  # TODO: Error handling here
        else:
            res = raw_res
        return res

    def _create_insert_query(self, triples):
        q = """
            INSERT DATA {{
                GRAPH <{0}> {{
            """.format(self.base_graph)
        for triple in triples:
            #            triple_str = ' '.join([term.n3() for term in triple]) + ' .\n'
            triple_str = ' '.join(
                ['<{0}>'.format(str(term)) for term in triple]) + ' .\n'
            q += triple_str
        q += """}
            }
            """
        return q

    def _is_bool(self, s):
        s = s.lower()
        if s == 'true' or s == 'false':
            return True
        else:
            return False

    def _str2bool(self, s):
        s = s.lower()
        if s == 'true':
            return True
        elif s == 'false':
            return False
        else:
            raise Exception('{0} is not convertible to boolean'.format(s))

    def _is_float(self, s):
        try:
            float(s)
            return True
        except:
            return False

    def _parse_term(self, term):
        if isinstance(term, URIRef) or isinstance(term, Literal):
            return term
        elif isinstance(term, str):
            if 'http' == term[0:4]:
                node = URIRef(term)
            elif ':' in term:  #TODO: This condition is dangerous.
                [ns, id_] = term.split(':')
                ns = self.namespaces[ns]
                node = ns[id_]
            else:
                if term.isdigit():
                    term = int(term)
                elif self._is_float(term):
                    term = float(term)
                if self._is_bool(term):
                    term = _str2bool(term)
                node = Literal(term)
        else:
            node = Literal(term)
        return node

    def add_triple(self, pseudo_s, pseudo_p, pseudo_o):
        triple = self.make_triple(pseudo_s, pseudo_p, pseudo_o)
        self.add_triples([triple])

    def add(self, triple):
        self.add_triples([triple])

    def make_triple(self, pseudo_s, pseudo_p, pseudo_o):
        s = self._parse_term(pseudo_s)
        p = self._parse_term(pseudo_p)
        o = self._parse_term(pseudo_o)
        return (s, p, o)

    def add_triples(self, pseudo_triples):
        triples = [
            self.make_triple(*pseudo_triple)
            for pseudo_triple in pseudo_triples
        ]
        self._add_triples(triples)

    def _add_triples(self, triples):
        q = self._create_insert_query(triples)
        res = self.update(q)

    def add_brick_instance(self, entity_name, tagset):
        entity = URIRef(BASE + entity_name)
        tagset = URIRef(BRICK + tagset)
        triples = [(entity, RDF.type, tagset)]
        self._add_triples(triples)
        return str(entity)

    def load_ttlfile(self, filepath):
        q = """
        load <file://{0}> into <{1}>
        """.format(filepath, self.base_graph)
        res = self.update(q)

    def load_schema(self):
        self.load_ttlfile(BRICK_FILE)
        self.load_ttlfile(BF_FILE)

    def parse(self, filepath, format=None):
        self.load_ttlfile(filepath)

    def serialize(self):
        qstr = """
        select ?s ?p ?o where{
        ?s ?p ?o .
        FILTER(STRSTARTS(STR(?s), "%s"))
        }
        """ % (BASE)
        res = self.raw_query(qstr)
        return res

    def __add__(self, other):
        assert isinstance(other, BrickEndpoint)
        qstr = """
        select ?s ?p ?o where{
        ?s ?p ?o .
        FILTER(STRSTARTS(STR(?s), "%s"))
        }
        """ % (BASE)
        res = other.raw_query(qstr)
        triples = [(URIRef(row['s']), URIRef(row['p']), URIRef(row['o']))
                   for row in res]
        triple_chunks = chunks(triples, 300)
        for chunk in triple_chunks:
            self._add_triples(chunk)
        return self
Beispiel #30
0
class WriterPlugin(RDFWriter):
    def __init__(self, reader, *args, **kwargs):
        super(WriterPlugin, self).__init__(reader, *args, **kwargs)

        if isinstance(self.reader, ReaderPlugin):
            self._endpoint = self.reader.endpoint
        else:
            self._endpoint = kwargs.get("endpoint")

        self._combine_queries = kwargs.get("combine_queries")
        self._results_format = JSON

        self._sparql_wrapper = SPARQLWrapper(self._endpoint, returnFormat=self._results_format)
        user = kwargs.get('user', None)
        password = kwargs.get('password', None)
        if user is not None and password is not None:
            self._sparql_wrapper.setCredentials(user, password)

        self._sparql_wrapper.setMethod("POST")

        default_graph = kwargs.get('default_graph',None)
        if default_graph:
            self._sparql_wrapper.addDefaultGraph(default_graph)

    @property
    def endpoint(self):
        return self._endpoint

    def _save(self, *resources):
        for context, items in _group_by_context(resources).iteritems():
            # Deletes all triples with matching subjects.
            remove_query = _prepare_delete_many_query(items, context)
            insert_query = _prepare_add_many_query(items, context)
            self._execute(remove_query, insert_query)

    def _update(self, *resources):
        for context, items in _group_by_context(resources).iteritems():
            # Explicitly enumerates triples for deletion.
            remove_query = _prepare_selective_delete_query(items, context)
            insert_query = _prepare_add_many_query(items, context)
            self._execute(remove_query, insert_query)

    def _remove(self, *resources, **kwargs):
        for context, items in _group_by_context(resources).iteritems():
            # Deletes all triples with matching subjects.
            inverse = kwargs.get("inverse")
            query = _prepare_delete_many_query(items, context, inverse)
            self._execute(query)

    def _size(self):
        """ Return total count of triples, not implemented. """
        raise NotImplementedError

    def _add_triple(self, s=None, p=None, o=None, context=None):
        self._add(s, p, o, context)

    def _set_triple(self, s=None, p=None, o=None, context=None):
        self._remove_from_endpoint(s, p, context=context)
        self._add(s, p, o, context)

    def _remove_triple(self, s=None, p=None, o=None, context=None):
        self._remove_from_endpoint(s, p, o, context)

    def _execute(self, *queries):
        """ Execute several queries. """
        
        translated = [unicode(query) for query in queries]  
        if self._combine_queries:
            translated = ["\n".join(translated)]

        try:
            for query_str in translated:
                debug(query_str)

                self._sparql_wrapper.setQuery(query_str)
                self._sparql_wrapper.query()

            return True

        except EndPointNotFound, _:
            raise SparqlWriterException("Endpoint not found"), None, sys.exc_info()[2]
        except QueryBadFormed, _:
            raise SparqlWriterException("Bad query: %s" % query_str), None, sys.exc_info()[2]
Beispiel #31
0
async def prenten(request):
    sparql = SPARQLWrapper(SPARQL_URI, returnFormat=JSON)
    sparql.addDefaultGraph("http://lod.kb.nl/gvn/ubl01/")
    sparql.setQuery(SPARQL_QUERY)
    res = sparql.query()
    return web.json_response(data=res.convert())
Beispiel #32
0
class Spar(object):
    '''
    class to query DBpedia for URI identity;
    URIs are identical when connected by 
    page redirects
    '''


    def __init__(self):
        '''
        constructor
        '''
        
        self.sparql = SPARQLWrapper("http://dbpedia.org/sparql")
        self.sparql.addDefaultGraph("http://dbpedia.org")
        
        
    def isEqualTest(self,uri1, uri2):
        '''
        check for identity of URIs (test)
        '''   
           
        spar = ("ASK {<" + uri1 + "> " 
                "(<http://dbpedia.org/ontology/wikiPageRedirects>|"
                "^<http://dbpedia.org/ontology/wikiPageRedirects>)* "
                "<" + uri2 + ">}")
        
        #print spar
        
        return self.evalQuery(spar)


    def isEqual(self,uri1, uri2):
        '''
        check for identity of URIs
        '''   
           
        spar = ("ASK {<" + uri1 + "> " 
                "(<http://dbpedia.org/ontology/wikiPageRedirects>|"
                "^<http://dbpedia.org/ontology/wikiPageRedirects>)* "
                "<" + uri2 + ">}")
        #print self.evalRes(self.evalQuery(spar))
        return True and self.evalRes(self.evalQuery(spar))
        
        
    def evalQuery(self,query):
        '''
        evaluate query remotely
        '''
        
        time.sleep(1) # run one query per second
        try:
            self.sparql.setReturnFormat(JSON)
            self.sparql.setQuery(query)
            print "running", query
            return self.sparql.query().convert()
        except:
            return False
        
        
    def evalRes(self,results):
        '''
        check for value of ask query
        '''
        
        if results == False:
            return results
        else:
            return results['boolean']        
        

    def printRes(self, results):
        '''
        print raw JSON results (tests)
        '''
        
        print results
      
        
    def test(self):
        '''
        test if it works
        '''
        
        uri1 = "http://dbpedia.org/resource/NaN"
        uri2 = "NaN"
        for i in range(20):
            print "query", i, self.isEqual(uri1, uri2)
        
    
# # execute    
# if __name__ == '__main__':
#     sp = Spar()
#     sp.test()   
Beispiel #33
0
class Sparql(object):

    """SPARQL queries to define property list and get property-specific subgraphs"""

    def __init__(self, entities, config_file, dataset, endpoint, default_graph):

        self.entities = entities  # file containing a list of entities

        self.dataset = dataset

        self.wrapper = SPARQLWrapper(endpoint)

        self.wrapper.setReturnFormat(JSON)

        if default_graph:

            self.default_graph = default_graph

            self.wrapper.addDefaultGraph(self.default_graph)

        self.query_prop = "SELECT ?s ?o  WHERE {?s %s ?o. }"

        self.query_prop_uri = "SELECT ?s ?o  WHERE {?s %s ?o. FILTER (?s = %s)}"

        self._define_properties(config_file)

    def _define_properties(self, config_file):

        self.properties = []

        with codecs.open(config_file, 'r', encoding='utf-8') as config_read:

            property_file = json.loads(config_read.read())

            for property_name in property_file[self.dataset]['content']:

                if 'feedback_' in property_name:

                    property_name = property_name.replace('feedback_', '')

                self.properties.append(property_name)

    def get_property_graphs(self):

        properties = self.properties

        if 'feedback' in properties:
            properties.remove('feedback')  # don't query for the feedback property

        for prop in properties:  # iterate on the properties

            prop_short = prop

            prop_namespace = prop

            if '/' in prop:

                # avoid creating file with a '/' in the name
                prop_short = prop.split('/')[-1]

                # if it is actually a URI, surround by "<>"
                if prop.startswith("http"):
                    prop_namespace = '<' + prop + '>'

            try:
                mkdir('datasets/%s/' % self.dataset)
                mkdir('datasets/%s/graphs' % self.dataset)

            except:
                pass

            with codecs.open('datasets/%s/graphs/%s.edgelist' % (self.dataset, prop_short), 'w',
                             encoding='utf-8') as prop_graph:  # open a property file graph

                for uri in self.entities:

                    uri = '<' + uri + '>'

                    self.wrapper.setQuery(self.query_prop_uri % (prop_namespace, uri))

                    for result in self.wrapper.query().convert()['results']['bindings']:

                        subj = result['s']['value']

                        obj = result['o']['value']

                        print((subj, obj))

                        prop_graph.write('%s %s\n' % (subj, obj))

    @staticmethod
    def get_uri_from_wiki_id(wiki_id):

        sparql = SPARQLWrapper("http://dbpedia.org/sparql")

        sparql.setQuery("""select ?s where {?s <http://dbpedia.org/ontology/wikiPageID> %d
           }""" % int(wiki_id))

        sparql.setReturnFormat(JSON)

        try:
            uri = sparql.query().convert()['results']['bindings'][0]['s']['value']

        except:

            uri = None

        return uri

    @staticmethod
    def get_item_metadata(uri, item_type, thumbnail_exists):

        sparql = SPARQLWrapper("http://dbpedia.org/sparql")

        sparql.setQuery("""select ?labelo ?labelp ?labels ?description ?abstract ?homepage ?authorlabo ?authorlabp ?authorlabs
                        where {

                        OPTIONAL {
                          <%s> <http://dbpedia.org/ontology/label> ?labelo .
                          FILTER(lang(?labelo) = 'en' )
                         }

                        OPTIONAL {
                          <%s> <http://dbpedia.org/property/label> ?labelp .
                          FILTER(lang(?labelp) = 'en' )
                        }

                        OPTIONAL {
                          <%s> <http://www.w3.org/2000/01/rdf-schema#label> ?labels.
                          FILTER(lang(?labels) = 'en' )
                        }

                        OPTIONAL {
                        <%s> <http://purl.org/dc/terms/description> ?description .
                        FILTER (lang(?description) = 'en')
                        }
                        OPTIONAL{
                        <%s> <http://xmlns.com/foaf/0.1/homepage> ?homepage .
                        }
                        OPTIONAL {
                          <%s> <http://dbpedia.org/ontology/abstract> ?abstract .
                          FILTER (lang(?abstract) = 'en')
                        }
                        OPTIONAL {
                        <%s> dbo:author ?o.
                        ?o rdfs:label ?authorlabs.
                        FILTER (lang(?authorlabs) = 'en')
                        }
                        OPTIONAL {
                        <%s> dbo:author ?o.
                        ?o dbo:label ?authorlabo.
                        FILTER (lang(?authorlabo) = 'en')
                        }

                        OPTIONAL {
                        <%s> dbo:author ?o.
                        ?o dbp:label ?authorlabp.
                        FILTER (lang(?authorlabp) = 'en')
                        }

                        }""" % (uri, uri, uri, uri, uri, uri, uri, uri, uri))
        

        sparql.setReturnFormat(JSON)

        try:  # check whether it does not return an empty list

            result_raw = sparql.query().convert()['results']['bindings'][0]
            result = {}

            for key, value in result_raw.items():

                result[key] = value['value']

            c = 0

            try:

                result['label'] = result['labels']

            except KeyError:
                c+=1
                pass

            try:

                result['label'] = result['labelp']

            except KeyError:
                c+=1
                pass

            try:

                result['label'] = result['labelo']

            except KeyError:
                c+=1
                pass

            # at least one label must be there
            if c == 3: 
                result = None

            # same with author
            c = 0

            try:

                result['author'] = result['authorlabs']

            except KeyError:
                c+=1
                pass

            try:

                result['author'] = result['authorlabp']

            except KeyError:
                c+=1
                pass

            try:

                result['author'] = result['authorlabo']

            except KeyError:
                c+=1
                pass

            # at least one label must be there
            if c == 3: 
                result = None

            # either abstract or description must be there
            if 'abstract' not in result.keys() and 'description' not in result.keys():
                result = None

            if not thumbnail_exists:

                # scrape google for thumbnail

                out = subprocess.check_output(["googleimagesdownload", "--keywords", "\"%s %s %s\"" % (result['label'].replace(',',''), result['author'], item_type), "--print_urls", "-l", "1"])

                url = out.decode('utf-8').split('\n')[4].replace('Image URL: ','')

                result['thumbnail'] = url

                if not result['thumbnail']:  # skip item if there is not thumbnail
                    result = None

        except:

            result = None

        return result
Beispiel #34
0
"""Virtuoso driver for graph database

The UWKGM project
:copyright: (c) 2020 Ichise Laboratory at NII & AIST
:author: Rungsiman Nararatwong
"""

from dorest import env
from SPARQLWrapper import SPARQLWrapper, JSON

from database.database.graph import default_graph_uri

config = env.resolve('database.virtuoso')

if 'port' in config:
    client = SPARQLWrapper('%s:%s/sparql/' %
                           (config['address'], config['port']))
else:
    client = SPARQLWrapper('%s/sparql/' % config['address'])

client.addDefaultGraph(default_graph_uri)
client.setReturnFormat(JSON)
Beispiel #35
0
from flask import Flask
from SPARQLWrapper import SPARQLWrapper, JSON
import os
from dotenv import load_dotenv
from flask import request
from flask import abort


app = Flask(__name__)
load_dotenv()

# Set Up SPARQL Connection
sparql = SPARQLWrapper(os.getenv('SPARQL_ENDPOINT'))
sparql.setReturnFormat(JSON)
sparql.addDefaultGraph('http://*****:*****@app.route('/productcategory/', methods=['GET'])
def get_product_category():
    product_name = request.args.get('product_name')
    product_name = product_name.replace('"','')
    print(product_name)
    """Get category of product given product name.
    Args:
        product_name (str): The product name. It should be url encoded, for example "Charcoal, sack" -> Charcoal%2C%20sack

    Returns:
        str: The category of queried product name.
    """
    q = ( f"""
    PREFIX reachIT: <http://www.reach-it.com/ontology/>
    SELECT str(?c) as ?type
Beispiel #36
0
class Sparql(object):

    def __init__ (self, entities, config_file, dataset, endpoint, default_graph, entity_class):

        self.entities = entities  # file containing a list of entities

        self.dataset = dataset

        self.wrapper = SPARQLWrapper(endpoint)

        self.wrapper.setReturnFormat(JSON)

        if default_graph:

            self.default_graph = default_graph

            self.wrapper.addDefaultGraph(self.default_graph)

        self.entity_class = entity_class

        self.query_prop = "SELECT ?s ?o  WHERE {?s %s ?o. }"

        self.query_prop_uri = "SELECT ?s ?o  WHERE {?s %s ?o. FILTER (?s = %s)}"

        self._define_properties(config_file)

    def _define_properties(self, config_file):

        with codecs.open(config_file, 'r', encoding='utf-8') as config_read:

            property_file = json.loads(config_read.read())

        try:

            self.properties = [i for i in property_file[self.dataset]]
            print(self.properties)

        except KeyError:

            print("No set of properties provided in the dataset")

            if not self.entity_class:

                query_all_prop = "SELECT distinct ?p " \
                                 "WHERE {?s ?p ?o. FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))}"

                self._get_properties(query_all_prop)

            else:

                query_category_prop = "select distinct ?p " \
                                      "where { ?s a dbo:Band. ?s ?p ?o. " \
                                      "FILTER(!isLiteral(?o) && regex(STR(?p),\"dbpedia.org/ontology\"))} "

                self._get_properties(query_category_prop)

    def _get_properties(self, query):  # get all the properties from sparql endpoint if a list is not provided in config file

        self.properties = []

        self.wrapper.setQuery(query)

        self.wrapper.setReturnFormat(JSON)

        for results in self.wrapper.query().convert()['results']['bindings']:

            self.properties.append(results['p']['value'])

        self.properties.append("dct:subject")

        self.properties.append("rdf:type")

    def get_property_graphs(self):

        properties = self.properties

        if 'feedback' in properties:
            properties.remove('feedback')  # don't query for the feedback property

        for prop in properties:  # iterate on the properties

            prop_short = prop

            prop_namespace = prop

            if '/' in prop:

                # avoid creating file with a '/' in the name
                prop_short = prop.split('/')[-1]

                # if it is actually a URI, surround by "<>"
                if prop.startswith("http"):
                    prop_namespace = '<'+prop+'>'

            try:
                mkdir('datasets/%s/'% self.dataset)
                mkdir('datasets/%s/graphs' % self.dataset)

            except:
                pass

            with codecs.open('datasets/%s/graphs/%s.edgelist' %(self.dataset, prop_short),'w', encoding='utf-8') as prop_graph: #open a property file graph

                if self.entities == "all":

                    self.wrapper.setQuery(self.query_prop%prop_namespace)

                    for result in self.wrapper.query().convert()['results']['bindings']:

                        subj = result['s']['value']

                        obj = result['o']['value']

                        print((subj, obj))

                        prop_graph.write('%s %s\n' %(subj, obj))

                else:

                    with codecs.open('%s'%self.entities,'r', encoding='utf-8') as f:  # open entity file, select only those entities

                        for uri in f:  # for each entity

                            uri = uri.strip('\n')

                            uri = '<'+uri+'>'

                            self.wrapper.setQuery(self.query_prop_uri%(prop_namespace,uri))

                            for result in self.wrapper.query().convert()['results']['bindings']:

                                subj = result['s']['value']

                                obj = result['o']['value']

                                print((subj, obj))

                                prop_graph.write('%s %s\n' %(subj, obj))

                        f.seek(0)  # reinitialize iterator

    @staticmethod
    def get_uri_from_wiki_id(wiki_id):

        sparql = SPARQLWrapper("http://dbpedia.org/sparql")

        sparql.setQuery("""select ?s where {?s <http://dbpedia.org/ontology/wikiPageID> %d
           }""" %int(wiki_id))

        sparql.setReturnFormat(JSON)

        try:
            uri = sparql.query().convert()['results']['bindings'][0]['s']['value']

        except:
            uri = None

        return uri
config.read('config.ini')

URI = sys.argv[1]
classType = sys.argv[2]

endpoint_uri = config['Mandatory']['endpointURI']
graph_uri = config['Mandatory']['graphURI']

# Set up endpoint and access to triple store
sparql = SPARQLWrapper(endpoint_uri)
sparql.setReturnFormat(JSON)
sparql.setMethod(POST)
store = SPARQLUpdateStore(endpoint_uri, endpoint_uri)

# Specify the (named) graph we're working with
sparql.addDefaultGraph(graph_uri)

# Create an in memory graph
g = Graph(store, identifier=graph_uri)

query = "select ?p ?o where {<"+ URI +"> ?p ?o}"
properties = g.query (query)

# Configurations mappings
mapping = ConfigParser()
mapping.read('mapping_fields.ini')

propURI = ""
props = ""
for row in properties:
	propURI = str(row[0])