Exemple #1
0
    def __init__(self,
                 full_name=None, last_name=None, first_name=None,
                 url=None,
                 # birth_year=None, death_year=None,
                 query_language=Lang.DEFAULT,
                 endpoints=None,  # SPARQL endpoints where the query should be sent
                 class_name=u'Person'
                 ):

        if not (full_name or (first_name and last_name) or url):  # or birth_year or death_year
            raise QueryException("There is not enough information provided to find this person."
                                 " Provide full name information.")

        self.has_full_name = normalize_str(full_name) if full_name else None
        self.has_last_name = normalize_str(last_name) if last_name else None
        self.has_first_name = normalize_str(first_name) if first_name else None

        # self.has_birth_year = birth_year
        # self.has_death_year = death_year
        super(Person, self).__init__(
            url=url,
            query_language=query_language,
            endpoints=endpoints,
            class_name=class_name
        )
    def _normalize_result(self, result_dict):
        """
        TODO
        :param result_dict:
        :return:
        """

        value = translate_to_legible_wikidata_properties(result_dict[u'value'])

        type = result_dict.get(u'type')
        if type and type == u'literal':
            lang = result_dict.get('xml:lang')
            if lang:
                value = "%s _(@%s)" % (value, lang)

        return normalize_str(value)
Exemple #3
0
def test_utils_normalize_str():
    """Utils - Naïve string normalisation: Should pass """

    no_norm ="   A form of logical, intuitive reasoning   to deduce the nature of an uncertain thing or" \
      " situation, usually in the absence or in     spite of concrete evidence. Adapted from the saying, " \
      "If it looks like     a duck, swims like a duck, and   quacks like a duck, then it's probably a " \
      "                                      duck.                                                       " \
      "" \
      "" \
      "                                                                                                  "
    normalised = normalize_str(no_norm)
    well_normalised = u"A form of logical, intuitive reasoning to deduce the nature of an uncertain" \
                      u" thing or situation, usually in the absence or in spite of concrete evidence." \
                      u" Adapted from the saying, If it looks like a duck, swims like a duck, " \
                      u"and quacks like a duck, then it's probably a duck."

    assert well_normalised == normalised
Exemple #4
0
    def _extract_prefix_from_rdf_element(self, element):
        """ The element can be of the form:
            1- 1789
            2- '<http://purl.org/dc/elements/1.1/title>'
            3- 'dc:title'
            4- '?anything'

        Case 1: Return the unchanged integer

        Case 2: The element contains a namespace. The function replaces it with the
        corresponding prefix if it exists in the vocabulary. If so, the corresponding
        prefix is added to self.prefixes.
        Eg: the predicate <http://purl.org/dc/elements/1.1/title> will become
        dc:title and the prefix vocabulary.NameSpace.dc will be added to self.prefixes.
        If the namespace cannot be identified, then it is just normalized and returned.

        Case 3 - The element is an URL.

        Case 4: The element contains a prefix. If this prefix is already listed in
        self.prefixes, no change is made. If the prefix is not yet in the list,
        we add it if possible (raise a NameSpace Exeption otherwise)

        Other cases: Just normalize the string (spaces, unicode) and return it.

        :param element: One of the 3 element of an RDF triple (subject, object or predicate).
        :return: Normalized string of an rdf triple element if it is a string
        """

        # Case 1 - The element is an integer, and should remain so
        if isinstance(element, int):
            return element

        # Case 2 - The element contains a namespace.
        if element[0] == u'<' and element[-1] == u'>':
            ns_element = element[1:-1]
            matches = RE_PREFIX_SEPARATOR.search(ns_element)
            if matches:
                limit = matches.end()
                pref = ns_element[0:limit]

                # If the namespace is listed in the vocabulary
                # then the element is shortened using the namespace prefix
                # and the prefix is added to the list of prefixes.
                short_prefix = NameSpace(pref)
                if short_prefix:
                    self.add_prefix(short_prefix)
                    element = u'%s:%s' % (short_prefix.name, ns_element[limit:])

        # Case 3 - The element is an URL.
        elif element[0:7] == 'http://':
            return u'<%s>' % element

        # Case 4 - The element contains a prefix
        elif u':' in element:
            pref, elem = element.split(u':')
            known_pref = NameSpace.__members__.get(pref)

            if known_pref:
                # The prefix is known. Then we add it.
                self.add_prefix(known_pref)

                # Enums labels with duplicate values are turned into aliases for the first such label.
                # This can lead to Namespaces prefix differences in the prefix list and the element.
                # In order to avoid this unwanted behavior, we transform the element prefix to its main alias.
                # E.g: element "dbpedia_owl:birthDate" will be transformed to "dbo:birthDate" (known_pref.name).

                if not known_pref == pref:
                    # Duplicate names in NameSpace => using the main alias for the duplicate namespaces.
                    element = element.replace(u'%s:' % pref, u'%s:' % known_pref.name)
            else:
                raise NameSpaceException(
                    u"In the standard vocabulary, %s can't be found. "
                    u"Without a prior declaration in the prefixes, it can't be used."
                    % known_pref)
        # Other cases
        return normalize_str(element)
Exemple #5
0
    def _build_standard_query(self,
                              entities_names,
                              check_type=True,
                              strict_mode=False):
        """
        Updates the query_builder of the object.
        The queries options relies on the dictionaries contained in pyneql/utils/vocabulary.py.

        :param entities_names: the class variables beginning with 'has_' which have a value instantiated

        :param check_type: Boolean.
        Check the type of the object (e.g: Book, Person, Location,…)
        directly in the SPARQL queries.
        If True, the restriction of the object's type is done in the query.
        For instance, if the object is a Book, activating type checking will build queries where the object
        to find (?Book) is constrained by an union of RDF triples checking that ?Book is a Book:
        ``[…] { ?Book a fabio:Book  } UNION […] UNION { ?Book a schemaorg:Book  } .``

        :param strict_mode: Boolean.
        Check the type of the object's attributes (e.g: label, first name,…)
        directly in the SPARQL queries.
        If True, the predicates of the triplet whose values are instantiated will have their types checked
        against the allowed types listed in ``self.voc_attributes``.
        Let's take an example:
        We are looking for a Thing whose *label* is "አዲስ አበባ".

        - Non strict mode will have its query restrained to elements satisfying
        the triplet ``?Thing ?has_label "አዲስ አበባ".``.
        The predicate is left undetermined (``?has_label`` is a variable).

        - In strict mode, we are strict about the types of predicates of the triplet.
         For the current class, those predicates will be listed in
         ``self.voc_attributes['has_label']`` and combined in the SPARQL query.
         Here, for the example, we set 'has_label' allowed the RDF predicates 'rdfs:label' and u'wdt:P1813'.

         >>> print(self.voc_attributes['has_label'])
         >>> [u'rdfs:label', u'wdt:P1813']

         So in strict_mode, the query will be constrained to:

         ``[…]{ ?Thing rdfs:label "አዲስ አበባ"  } UNION { ?Thing wdt:P1813 "አዲስ አበባ"  }.[…]``
        """
        if check_type:
            # Restricting the query to elements of the current type
            # This will build a query with union of RDF checking the type (eg.Book):
            # [...] { ?Book a fabio:Book  } UNION [...] UNION { ?Book a schemaorg:Book  } .
            to_unite = set([])
            for class_type in self.rdf_types:
                to_unite.add(
                    RDFTriple(subject=self.args['subject'],
                              predicate=u'a',
                              object=class_type,
                              language=self.query_language))
            self.query_builder.add_query_alternative_triples(to_unite)

        # Adding query delimiters, that are the parameters given for query
        # (i.e stored in the instance variables beginning with "has_").
        for entity_name in entities_names:
            entity_values = self.__dict__.get(entity_name, None)
            if is_listlike(entity_values):
                # TODO ici il faudrait créer des alternate triples
                self.create_triples_for_multiple_element(
                    entity_name, entity_values)

            else:
                entity_value = normalize_str(entity_values)
                self.create_triples_for_single_element(entity_name,
                                                       entity_value,
                                                       strict_mode)

        # Fetching everything about that Thing
        self.query_builder.add_query_triple(
            RDFTriple(subject=self.args['subject'],
                      predicate=self.args['predicate'],
                      object=self.args['object'],
                      language=self.query_language))