def set_limit(self, limit): """ Limits the number of results the query returns. :param limit: int (the limit of query results) """ if type(limit) != int: raise QueryException( u" Bad limit type. Must be an int, got %s instead." % type(limit)) elif limit < 1: raise QueryException(u" Bad limit value. Must be greater than 0.") self.limit = u'LIMIT %i' % limit logging.debug("Adding limit = %i to query." % limit)
def __init__(self, full_name=None, last_name=None, first_name=None, url=None, # birth_year=None, death_year=None, query_language=Lang.DEFAULT, endpoints=None, # SPARQL endpoints where the query should be sent class_name=u'Person' ): if not (full_name or (first_name and last_name) or url): # or birth_year or death_year raise QueryException("There is not enough information provided to find this person." " Provide full name information.") self.has_full_name = normalize_str(full_name) if full_name else None self.has_last_name = normalize_str(last_name) if last_name else None self.has_first_name = normalize_str(first_name) if first_name else None # self.has_birth_year = birth_year # self.has_death_year = death_year super(Person, self).__init__( url=url, query_language=query_language, endpoints=endpoints, class_name=class_name )
def __init__( self, title=None, author=None, publisher=None, publication_date=None, gallica_url=None, url=None, query_language=Lang.DEFAULT, endpoints=None, # SPARQL endpoints where the query should be sent class_name=u'Book'): if not (url or gallica_url or ((author or publication_date or publisher) and title)): raise QueryException( "There is not enough information provided to find this creative work." " Provide more information.") self.has_publisher = publisher self.has_gallica_url = gallica_url self.has_url = url super(Book, self).__init__(title=title, author=author, date=publication_date, url=url, query_language=query_language, endpoints=endpoints, class_name=class_name)
def __init__( self, title=None, author=None, author_is_organisation=False, date=None, url=None, query_language=Lang.DEFAULT, endpoints=None, # SPARQL endpoints where the query should be sent class_name=u'CreativeWork'): # The author may be a Person or an Organization if author: if isinstance(author, Person) or isinstance( author, Thing): # TODO: Implement organisation self.has_author = author.get_uris() if not self.has_author: raise QueryException( "The given author is invalid (no URI found).") else: if author_is_organisation: self.has_author = self.find_author_organisation( author, query_language) else: self.has_author = self.find_author_person( author, query_language) self.has_title = title self.has_date = date self.has_url = url super(CreativeWork, self).__init__(url=url, query_language=query_language, endpoints=endpoints, class_name=class_name)
def _validate_arguments(self): """Check that the query arguments can be used in a valid SPARQL query""" # The only mandatory argument to put in our template is the list # of rdf triples. if not (self.triples or self.alternate_triples): raise QueryException( u"The query can't be instantiated without rdf triples in the WHERE clause" ) # Check prefixes, which is a list of namespace.NameSpace if not all(isinstance(p, NameSpace) for p in self.prefixes): raise QueryException( u"At least one of the prefixes given are NOT of type %s" % NameSpace.__name__) # Check that there is at least one endpoint specified. If not, adding the default endpoint. if len(self.endpoints) == 0: self.endpoints.add(Endpoint.DEFAULT) logging.warning("No endpoint were set - Using DEFAULT (%s)" % highlight_str(Endpoint.DEFAULT.value)) # If there are too many alternate triples, the query fails # because the length of generated SQL text generated by some # Virtuoso engines exceeds 10000 lines of code. # In those cases, we decrease the length of the alternate triples # by removing some language-specific DBpedia URLs new_alternate_triple = [] for alternate_triple in self.alternate_triples: if len(alternate_triple ) <= 10: # This value was arbitrarily chosen new_alternate_triple.append(alternate_triple) else: cleaned_alternate_triples = set([]) for triple in alternate_triple: matched = search('<http://(?P<lang>..)\.dbpedia.org/', triple.object) if matched: # We wish to keep only the one in the specified language if triple.language.value in matched.groupdict().values( ): cleaned_alternate_triples.add(triple) else: cleaned_alternate_triples.add(triple) new_alternate_triple.append(cleaned_alternate_triples) self.alternate_triples = new_alternate_triple
def add_endpoint(self, endpoint): """ Add an :class:`Endpoint` to the current query. This query will be send to evey listed endpoint. The result will be aggregated. For list of supported endpoints, see enum.Endpoints :param endpoint: the endpoint to add """ if type(endpoint) == Endpoint: self.endpoints.add(endpoint) else: raise QueryException( u" Bad endpoint type. Must be an Endpoint, got %s instead." % type(endpoint))
def __init__( self, query_language=Lang.DEFAULT, label=None, url=None, endpoints=None, # SPARQL endpoints where the query should be sent limit=1500, class_name=u'Thing'): """ :param query_language: The language of the query :param label: The label which should be queried :param url: The URL/URI of the Semantic Web object we wish to query :param endpoints: The endoints where the query should be send :param limit: The limit puts an upper bound on the number of solutions returned by the query that will be stored. :param class_name: The name of the current class """ if not isinstance(query_language, Lang): raise QueryException( "The language of the query must be of type enum.LanguagesIso6391." ) self.has_label = label self.has_url = url self.class_name = class_name self.rdf_types = rdf_types[class_name] self.voc_attributes = voc_attributes[class_name] self.args = { 'subject': u'?%s' % class_name, 'predicate': u'?pred', 'object': u'?obj' } self.query_builder = GenericSPARQLQuery() self.query_limit = limit self.query_language = query_language # Adding Endpoints self.endpoints = set(endpoints) if endpoints else set([]) # Initializing results set self.attributes = {} # Initializing literals by languages self.labels_by_languages = {}
def _send_requests(self): """ Send the current query to the SPARQL endpoints declared in self.endpoints. :return: the list of http responses for the current query fore each SPARQL endpoint. """ responses = [] self._validate_arguments() for endpoint in self.endpoints: # Depending on the endpoint, queries may be slightly different, especially # concerning language information of literals. self._querify(endpoint) if not self.queries.get(endpoint, False): raise QueryException( u"No query was generated for endpoint %s." % str(endpoint)) logging.debug( "Sending query %s to endpoint %s ..." % (highlight_str(self.queries.get(endpoint, "{}"), highlight_type='query'), endpoint.value)) headers = {'Accept': 'application/json'} params = { "query": self.queries.get(endpoint, "{}"), # "default-graph-uri": endpoint.value # nothing after .xyz } response = requests.post(endpoint.value, params=params, headers=headers) # using POST instead of GET because it is slightly more efficient for big # queries (POST queries are not cached) if response.status_code == 200: responses.append(response) else: logging.warning( "Query %s returned http code %s when sent to %s." % (highlight_str(self.queries.get(endpoint, "{}"), highlight_type='query'), highlight_str(response.status_code), endpoint.value)) return responses
def find_author_person(self, author, query_language, strict_mode=True, check_type=True): """ Find a Person corresponding to an author name. :param author: Full name of the author :param query_language: The language declared in the Book query :return: """ author = Person(full_name=author, query_language=query_language, endpoints=[e for e in Endpoint]) author.query(strict_mode=strict_mode, check_type=check_type) if not author.attributes: raise QueryException( "The author could not be found on the Semantic Web.") return author.get_uris()
def find_author_organisation(self, author, query_language, strict_mode=True, check_type=True): """ Find an Organisation corresponding to an author name. :param author: Full name of the Organisation :param query_language: The language declared in the Book query :return: """ # raise NotImplementedError("The Class 'Organisation' is not implemented yet !") author = Thing(label=author, query_language=query_language, endpoints=[e for e in Endpoint]) author.query(strict_mode=strict_mode, check_type=check_type) if not author.attributes: raise QueryException( "The authoring organisation could be found on the Semantic Web." ) return author.get_uris()