Beispiel #1
0
def test_utils_is_listlike():
    """Is a python object iterable but not a string ?: should pass"""
    ok = [(1, 2, 3), [1, 2, 3], {1, 3, 2}, iter([1, 2, 3])]
    ok_size = len(ok)
    not_ok = [123, "123", u'123']

    assert len([is_listlike(e) for e in ok]) == ok_size
    assert not any([is_listlike(e) for e in not_ok])
Beispiel #2
0
    def _build_url_query(self):
        """If the current object is already identified by an URL/URI or a list of URL/URIs,
         we can unambiguously query it/them.
        The URL(s) becomes the subject(s) of our RDF triple.
        """

        if is_listlike(self.has_url):

            # We have a list/set/tuple of URLs: the RDF triple will be:
            # `{ <URL1> ?pred ?obj  } UNION { <URL2> ?pred ?obj  } .`
            to_unite = set([])
            for url in self.has_url:
                to_unite.add(
                    RDFTriple(subject=url,
                              predicate=self.args['predicate'],
                              object=self.args['object'],
                              language=self.query_language))

            self.query_builder.add_query_alternative_triples(to_unite)

        else:
            # We have a simple URL: the RDF triple will be:
            # `<URL> ?pred ?obj .`
            self.query_builder.add_query_triple(
                RDFTriple(subject=self.has_url,
                          predicate=self.args['predicate'],
                          object=self.args['object'],
                          language=self.query_language))
Beispiel #3
0
    def _process_subject_url_results(self, pred, obj):
        """Return a dictionary of results for a query containing an URL as the subject of a triple."""
        things = {}
        result_dict_list = [{
            a: a1,
            b: b1
        } for (a, a1), (b, b1) in self.query_builder.results]
        result_dict_list = [{
            get_shortened_uri(item.get(pred)): item.get(obj)
        } for item in result_dict_list]
        if result_dict_list:
            urls = self.__dict__.get('has_url') \
                if is_listlike(self.__dict__.get('has_url')) \
                else [self.__dict__.get('has_url')]

            for url in urls:
                things[url] = reduce(merge_two_dicts_in_sets, result_dict_list)
                # If we retrieved the thing using its URL,
                # we are sure the results correspond to the right thing.
                things[url][u'validated'] = 1
        return things
Beispiel #4
0
    def _process_any_results(self, subj, pred, obj, check_type=True):
        """ Return a dictionary of results for standard types of queries.
        TODO: Document better that part."""

        # The dict 'thing' will keep track of all the semantic web objects
        # that are returned by the query
        things = {}

        # We need to check only the object's instantiated "has_..." values
        values_to_check = {}
        for element, value in self.__dict__.items():
            if element.startswith('has_') and self.__dict__.get(element, None):
                if is_listlike(value):
                    for v in value:
                        values_to_check[v] = element
                else:
                    values_to_check[value] = element

        # If check_type is set to True, we need to check the type of the responses
        for result in self.query_builder.results:

            shortened_result = {}
            dict_results = {
                arg_name: get_shortened_uri(arg_value)
                for (arg_name, arg_value) in result
            }
            thing = dict_results.pop(subj, None)

            # Checking that the result is of the right type
            if check_type\
                    and dict_results[pred] in ['wdt:P31', 'rdf:type', 'rdfs:subClassOf']\
                    and dict_results[obj] in self.rdf_types:
                shortened_result[u'validated'] = 1

            shortened_result[dict_results[pred]] = dict_results[obj]

            things[thing] = merge_two_dicts_in_sets(things.get(thing, {}),
                                                    shortened_result)

        return things
Beispiel #5
0
    def _build_standard_query(self,
                              entities_names,
                              check_type=True,
                              strict_mode=False):
        """
        Updates the query_builder of the object.
        The queries options relies on the dictionaries contained in pyneql/utils/vocabulary.py.

        :param entities_names: the class variables beginning with 'has_' which have a value instantiated

        :param check_type: Boolean.
        Check the type of the object (e.g: Book, Person, Location,…)
        directly in the SPARQL queries.
        If True, the restriction of the object's type is done in the query.
        For instance, if the object is a Book, activating type checking will build queries where the object
        to find (?Book) is constrained by an union of RDF triples checking that ?Book is a Book:
        ``[…] { ?Book a fabio:Book  } UNION […] UNION { ?Book a schemaorg:Book  } .``

        :param strict_mode: Boolean.
        Check the type of the object's attributes (e.g: label, first name,…)
        directly in the SPARQL queries.
        If True, the predicates of the triplet whose values are instantiated will have their types checked
        against the allowed types listed in ``self.voc_attributes``.
        Let's take an example:
        We are looking for a Thing whose *label* is "አዲስ አበባ".

        - Non strict mode will have its query restrained to elements satisfying
        the triplet ``?Thing ?has_label "አዲስ አበባ".``.
        The predicate is left undetermined (``?has_label`` is a variable).

        - In strict mode, we are strict about the types of predicates of the triplet.
         For the current class, those predicates will be listed in
         ``self.voc_attributes['has_label']`` and combined in the SPARQL query.
         Here, for the example, we set 'has_label' allowed the RDF predicates 'rdfs:label' and u'wdt:P1813'.

         >>> print(self.voc_attributes['has_label'])
         >>> [u'rdfs:label', u'wdt:P1813']

         So in strict_mode, the query will be constrained to:

         ``[…]{ ?Thing rdfs:label "አዲስ አበባ"  } UNION { ?Thing wdt:P1813 "አዲስ አበባ"  }.[…]``
        """
        if check_type:
            # Restricting the query to elements of the current type
            # This will build a query with union of RDF checking the type (eg.Book):
            # [...] { ?Book a fabio:Book  } UNION [...] UNION { ?Book a schemaorg:Book  } .
            to_unite = set([])
            for class_type in self.rdf_types:
                to_unite.add(
                    RDFTriple(subject=self.args['subject'],
                              predicate=u'a',
                              object=class_type,
                              language=self.query_language))
            self.query_builder.add_query_alternative_triples(to_unite)

        # Adding query delimiters, that are the parameters given for query
        # (i.e stored in the instance variables beginning with "has_").
        for entity_name in entities_names:
            entity_values = self.__dict__.get(entity_name, None)
            if is_listlike(entity_values):
                # TODO ici il faudrait créer des alternate triples
                self.create_triples_for_multiple_element(
                    entity_name, entity_values)

            else:
                entity_value = normalize_str(entity_values)
                self.create_triples_for_single_element(entity_name,
                                                       entity_value,
                                                       strict_mode)

        # Fetching everything about that Thing
        self.query_builder.add_query_triple(
            RDFTriple(subject=self.args['subject'],
                      predicate=self.args['predicate'],
                      object=self.args['object'],
                      language=self.query_language))