def test_utils_is_listlike(): """Is a python object iterable but not a string ?: should pass""" ok = [(1, 2, 3), [1, 2, 3], {1, 3, 2}, iter([1, 2, 3])] ok_size = len(ok) not_ok = [123, "123", u'123'] assert len([is_listlike(e) for e in ok]) == ok_size assert not any([is_listlike(e) for e in not_ok])
def _build_url_query(self): """If the current object is already identified by an URL/URI or a list of URL/URIs, we can unambiguously query it/them. The URL(s) becomes the subject(s) of our RDF triple. """ if is_listlike(self.has_url): # We have a list/set/tuple of URLs: the RDF triple will be: # `{ <URL1> ?pred ?obj } UNION { <URL2> ?pred ?obj } .` to_unite = set([]) for url in self.has_url: to_unite.add( RDFTriple(subject=url, predicate=self.args['predicate'], object=self.args['object'], language=self.query_language)) self.query_builder.add_query_alternative_triples(to_unite) else: # We have a simple URL: the RDF triple will be: # `<URL> ?pred ?obj .` self.query_builder.add_query_triple( RDFTriple(subject=self.has_url, predicate=self.args['predicate'], object=self.args['object'], language=self.query_language))
def _process_subject_url_results(self, pred, obj): """Return a dictionary of results for a query containing an URL as the subject of a triple.""" things = {} result_dict_list = [{ a: a1, b: b1 } for (a, a1), (b, b1) in self.query_builder.results] result_dict_list = [{ get_shortened_uri(item.get(pred)): item.get(obj) } for item in result_dict_list] if result_dict_list: urls = self.__dict__.get('has_url') \ if is_listlike(self.__dict__.get('has_url')) \ else [self.__dict__.get('has_url')] for url in urls: things[url] = reduce(merge_two_dicts_in_sets, result_dict_list) # If we retrieved the thing using its URL, # we are sure the results correspond to the right thing. things[url][u'validated'] = 1 return things
def _process_any_results(self, subj, pred, obj, check_type=True): """ Return a dictionary of results for standard types of queries. TODO: Document better that part.""" # The dict 'thing' will keep track of all the semantic web objects # that are returned by the query things = {} # We need to check only the object's instantiated "has_..." values values_to_check = {} for element, value in self.__dict__.items(): if element.startswith('has_') and self.__dict__.get(element, None): if is_listlike(value): for v in value: values_to_check[v] = element else: values_to_check[value] = element # If check_type is set to True, we need to check the type of the responses for result in self.query_builder.results: shortened_result = {} dict_results = { arg_name: get_shortened_uri(arg_value) for (arg_name, arg_value) in result } thing = dict_results.pop(subj, None) # Checking that the result is of the right type if check_type\ and dict_results[pred] in ['wdt:P31', 'rdf:type', 'rdfs:subClassOf']\ and dict_results[obj] in self.rdf_types: shortened_result[u'validated'] = 1 shortened_result[dict_results[pred]] = dict_results[obj] things[thing] = merge_two_dicts_in_sets(things.get(thing, {}), shortened_result) return things
def _build_standard_query(self, entities_names, check_type=True, strict_mode=False): """ Updates the query_builder of the object. The queries options relies on the dictionaries contained in pyneql/utils/vocabulary.py. :param entities_names: the class variables beginning with 'has_' which have a value instantiated :param check_type: Boolean. Check the type of the object (e.g: Book, Person, Location,…) directly in the SPARQL queries. If True, the restriction of the object's type is done in the query. For instance, if the object is a Book, activating type checking will build queries where the object to find (?Book) is constrained by an union of RDF triples checking that ?Book is a Book: ``[…] { ?Book a fabio:Book } UNION […] UNION { ?Book a schemaorg:Book } .`` :param strict_mode: Boolean. Check the type of the object's attributes (e.g: label, first name,…) directly in the SPARQL queries. If True, the predicates of the triplet whose values are instantiated will have their types checked against the allowed types listed in ``self.voc_attributes``. Let's take an example: We are looking for a Thing whose *label* is "አዲስ አበባ". - Non strict mode will have its query restrained to elements satisfying the triplet ``?Thing ?has_label "አዲስ አበባ".``. The predicate is left undetermined (``?has_label`` is a variable). - In strict mode, we are strict about the types of predicates of the triplet. For the current class, those predicates will be listed in ``self.voc_attributes['has_label']`` and combined in the SPARQL query. Here, for the example, we set 'has_label' allowed the RDF predicates 'rdfs:label' and u'wdt:P1813'. >>> print(self.voc_attributes['has_label']) >>> [u'rdfs:label', u'wdt:P1813'] So in strict_mode, the query will be constrained to: ``[…]{ ?Thing rdfs:label "አዲስ አበባ" } UNION { ?Thing wdt:P1813 "አዲስ አበባ" }.[…]`` """ if check_type: # Restricting the query to elements of the current type # This will build a query with union of RDF checking the type (eg.Book): # [...] { ?Book a fabio:Book } UNION [...] UNION { ?Book a schemaorg:Book } . to_unite = set([]) for class_type in self.rdf_types: to_unite.add( RDFTriple(subject=self.args['subject'], predicate=u'a', object=class_type, language=self.query_language)) self.query_builder.add_query_alternative_triples(to_unite) # Adding query delimiters, that are the parameters given for query # (i.e stored in the instance variables beginning with "has_"). for entity_name in entities_names: entity_values = self.__dict__.get(entity_name, None) if is_listlike(entity_values): # TODO ici il faudrait créer des alternate triples self.create_triples_for_multiple_element( entity_name, entity_values) else: entity_value = normalize_str(entity_values) self.create_triples_for_single_element(entity_name, entity_value, strict_mode) # Fetching everything about that Thing self.query_builder.add_query_triple( RDFTriple(subject=self.args['subject'], predicate=self.args['predicate'], object=self.args['object'], language=self.query_language))