Beispiel #1
0
    def set_limit(self, limit):
        """ Limits the number of results the query returns.

        :param limit: int (the limit of query results)
        """
        if type(limit) != int:
            raise QueryException(
                u" Bad limit type. Must be an int, got %s instead." %
                type(limit))
        elif limit < 1:
            raise QueryException(u" Bad limit value. Must be greater than 0.")

        self.limit = u'LIMIT %i' % limit
        logging.debug("Adding limit = %i to query." % limit)
Beispiel #2
0
    def __init__(self,
                 full_name=None, last_name=None, first_name=None,
                 url=None,
                 # birth_year=None, death_year=None,
                 query_language=Lang.DEFAULT,
                 endpoints=None,  # SPARQL endpoints where the query should be sent
                 class_name=u'Person'
                 ):

        if not (full_name or (first_name and last_name) or url):  # or birth_year or death_year
            raise QueryException("There is not enough information provided to find this person."
                                 " Provide full name information.")

        self.has_full_name = normalize_str(full_name) if full_name else None
        self.has_last_name = normalize_str(last_name) if last_name else None
        self.has_first_name = normalize_str(first_name) if first_name else None

        # self.has_birth_year = birth_year
        # self.has_death_year = death_year
        super(Person, self).__init__(
            url=url,
            query_language=query_language,
            endpoints=endpoints,
            class_name=class_name
        )
Beispiel #3
0
    def __init__(
            self,
            title=None,
            author=None,
            publisher=None,
            publication_date=None,
            gallica_url=None,
            url=None,
            query_language=Lang.DEFAULT,
            endpoints=None,  # SPARQL endpoints where the query should be sent
            class_name=u'Book'):

        if not (url or gallica_url or
                ((author or publication_date or publisher) and title)):
            raise QueryException(
                "There is not enough information provided to find this creative work."
                " Provide more information.")

        self.has_publisher = publisher
        self.has_gallica_url = gallica_url
        self.has_url = url

        super(Book, self).__init__(title=title,
                                   author=author,
                                   date=publication_date,
                                   url=url,
                                   query_language=query_language,
                                   endpoints=endpoints,
                                   class_name=class_name)
Beispiel #4
0
    def __init__(
        self,
        title=None,
        author=None,
        author_is_organisation=False,
        date=None,
        url=None,
        query_language=Lang.DEFAULT,
        endpoints=None,  # SPARQL endpoints where the query should be sent
        class_name=u'CreativeWork'):

        # The author may be a Person or an Organization
        if author:
            if isinstance(author, Person) or isinstance(
                    author, Thing):  # TODO: Implement organisation
                self.has_author = author.get_uris()
                if not self.has_author:
                    raise QueryException(
                        "The given author is invalid (no URI found).")
            else:
                if author_is_organisation:
                    self.has_author = self.find_author_organisation(
                        author, query_language)
                else:
                    self.has_author = self.find_author_person(
                        author, query_language)

        self.has_title = title
        self.has_date = date
        self.has_url = url

        super(CreativeWork, self).__init__(url=url,
                                           query_language=query_language,
                                           endpoints=endpoints,
                                           class_name=class_name)
Beispiel #5
0
    def _validate_arguments(self):
        """Check that the query arguments can be used in a valid SPARQL query"""

        # The only mandatory argument to put in our template is the list
        # of rdf triples.
        if not (self.triples or self.alternate_triples):
            raise QueryException(
                u"The query can't be instantiated without rdf triples in the WHERE clause"
            )

        # Check prefixes, which is a list of namespace.NameSpace
        if not all(isinstance(p, NameSpace) for p in self.prefixes):
            raise QueryException(
                u"At least one of the prefixes given are NOT of type %s" %
                NameSpace.__name__)

        # Check that there is at least one endpoint specified. If not, adding the default endpoint.
        if len(self.endpoints) == 0:
            self.endpoints.add(Endpoint.DEFAULT)
            logging.warning("No endpoint were set - Using DEFAULT (%s)" %
                            highlight_str(Endpoint.DEFAULT.value))

        # If there are too many alternate triples, the query fails
        # because the length of generated SQL text generated by some
        # Virtuoso engines exceeds 10000 lines of code.
        # In those cases, we decrease the length of the alternate triples
        # by removing some language-specific DBpedia URLs
        new_alternate_triple = []
        for alternate_triple in self.alternate_triples:
            if len(alternate_triple
                   ) <= 10:  # This value was arbitrarily chosen
                new_alternate_triple.append(alternate_triple)
            else:
                cleaned_alternate_triples = set([])
                for triple in alternate_triple:
                    matched = search('<http://(?P<lang>..)\.dbpedia.org/',
                                     triple.object)
                    if matched:
                        # We wish to keep only the one in the specified language
                        if triple.language.value in matched.groupdict().values(
                        ):
                            cleaned_alternate_triples.add(triple)
                    else:
                        cleaned_alternate_triples.add(triple)
                new_alternate_triple.append(cleaned_alternate_triples)

        self.alternate_triples = new_alternate_triple
Beispiel #6
0
    def add_endpoint(self, endpoint):
        """
        Add an :class:`Endpoint` to the current query. This query will be send to
        evey listed endpoint. The result will be aggregated.
        For list of supported endpoints, see enum.Endpoints

        :param endpoint: the endpoint to add
        """
        if type(endpoint) == Endpoint:
            self.endpoints.add(endpoint)
        else:
            raise QueryException(
                u" Bad endpoint type. Must be an Endpoint, got %s instead." %
                type(endpoint))
Beispiel #7
0
    def __init__(
            self,
            query_language=Lang.DEFAULT,
            label=None,
            url=None,
            endpoints=None,  # SPARQL endpoints where the query should be sent
            limit=1500,
            class_name=u'Thing'):
        """
        :param query_language: The language of the query
        :param label: The label which should be queried
        :param url: The URL/URI of the Semantic Web object we wish to query
        :param endpoints: The endoints where the query should be send
        :param limit: The limit puts an upper bound on the number of solutions returned by the query that will be stored.
        :param class_name: The name of the current class

        """

        if not isinstance(query_language, Lang):
            raise QueryException(
                "The language of the query must be of type enum.LanguagesIso6391."
            )

        self.has_label = label
        self.has_url = url

        self.class_name = class_name
        self.rdf_types = rdf_types[class_name]
        self.voc_attributes = voc_attributes[class_name]

        self.args = {
            'subject': u'?%s' % class_name,
            'predicate': u'?pred',
            'object': u'?obj'
        }

        self.query_builder = GenericSPARQLQuery()
        self.query_limit = limit
        self.query_language = query_language

        # Adding Endpoints
        self.endpoints = set(endpoints) if endpoints else set([])

        # Initializing results set
        self.attributes = {}

        # Initializing literals by languages
        self.labels_by_languages = {}
Beispiel #8
0
    def _send_requests(self):
        """
        Send the current query to the SPARQL endpoints declared in self.endpoints.

        :return: the list of http responses for the current query fore each SPARQL endpoint.
        """
        responses = []

        self._validate_arguments()

        for endpoint in self.endpoints:

            # Depending on the endpoint, queries may be slightly different, especially
            # concerning language information of literals.
            self._querify(endpoint)
            if not self.queries.get(endpoint, False):
                raise QueryException(
                    u"No query was generated for endpoint %s." % str(endpoint))

            logging.debug(
                "Sending query %s to endpoint %s ..." %
                (highlight_str(self.queries.get(endpoint, "{}"),
                               highlight_type='query'), endpoint.value))

            headers = {'Accept': 'application/json'}
            params = {
                "query": self.queries.get(endpoint, "{}"),
                # "default-graph-uri": endpoint.value  # nothing after .xyz
            }

            response = requests.post(endpoint.value,
                                     params=params,
                                     headers=headers)
            # using POST instead of GET because it is slightly more efficient for big
            # queries (POST queries are not cached)

            if response.status_code == 200:
                responses.append(response)
            else:
                logging.warning(
                    "Query %s returned http code %s when sent to %s." %
                    (highlight_str(self.queries.get(endpoint, "{}"),
                                   highlight_type='query'),
                     highlight_str(response.status_code), endpoint.value))
        return responses
Beispiel #9
0
    def find_author_person(self,
                           author,
                           query_language,
                           strict_mode=True,
                           check_type=True):
        """ Find a Person corresponding to an author name.
        :param author: Full name of the author
        :param query_language: The language declared in the Book query
        :return:
        """

        author = Person(full_name=author,
                        query_language=query_language,
                        endpoints=[e for e in Endpoint])
        author.query(strict_mode=strict_mode, check_type=check_type)

        if not author.attributes:
            raise QueryException(
                "The author could not be found on the Semantic Web.")
        return author.get_uris()
Beispiel #10
0
    def find_author_organisation(self,
                                 author,
                                 query_language,
                                 strict_mode=True,
                                 check_type=True):
        """ Find an Organisation corresponding to an author name.
        :param author: Full name of the Organisation
        :param query_language: The language declared in the Book query
        :return:
        """
        # raise NotImplementedError("The Class 'Organisation' is not implemented yet !")

        author = Thing(label=author,
                       query_language=query_language,
                       endpoints=[e for e in Endpoint])
        author.query(strict_mode=strict_mode, check_type=check_type)
        if not author.attributes:
            raise QueryException(
                "The authoring organisation could be found on the Semantic Web."
            )
        return author.get_uris()