def __init__(self, full_name=None, last_name=None, first_name=None, url=None, # birth_year=None, death_year=None, query_language=Lang.DEFAULT, endpoints=None, # SPARQL endpoints where the query should be sent class_name=u'Person' ): if not (full_name or (first_name and last_name) or url): # or birth_year or death_year raise QueryException("There is not enough information provided to find this person." " Provide full name information.") self.has_full_name = normalize_str(full_name) if full_name else None self.has_last_name = normalize_str(last_name) if last_name else None self.has_first_name = normalize_str(first_name) if first_name else None # self.has_birth_year = birth_year # self.has_death_year = death_year super(Person, self).__init__( url=url, query_language=query_language, endpoints=endpoints, class_name=class_name )
def _normalize_result(self, result_dict): """ TODO :param result_dict: :return: """ value = translate_to_legible_wikidata_properties(result_dict[u'value']) type = result_dict.get(u'type') if type and type == u'literal': lang = result_dict.get('xml:lang') if lang: value = "%s _(@%s)" % (value, lang) return normalize_str(value)
def test_utils_normalize_str(): """Utils - Naïve string normalisation: Should pass """ no_norm =" A form of logical, intuitive reasoning to deduce the nature of an uncertain thing or" \ " situation, usually in the absence or in spite of concrete evidence. Adapted from the saying, " \ "If it looks like a duck, swims like a duck, and quacks like a duck, then it's probably a " \ " duck. " \ "" \ "" \ " " normalised = normalize_str(no_norm) well_normalised = u"A form of logical, intuitive reasoning to deduce the nature of an uncertain" \ u" thing or situation, usually in the absence or in spite of concrete evidence." \ u" Adapted from the saying, If it looks like a duck, swims like a duck, " \ u"and quacks like a duck, then it's probably a duck." assert well_normalised == normalised
def _extract_prefix_from_rdf_element(self, element): """ The element can be of the form: 1- 1789 2- '<http://purl.org/dc/elements/1.1/title>' 3- 'dc:title' 4- '?anything' Case 1: Return the unchanged integer Case 2: The element contains a namespace. The function replaces it with the corresponding prefix if it exists in the vocabulary. If so, the corresponding prefix is added to self.prefixes. Eg: the predicate <http://purl.org/dc/elements/1.1/title> will become dc:title and the prefix vocabulary.NameSpace.dc will be added to self.prefixes. If the namespace cannot be identified, then it is just normalized and returned. Case 3 - The element is an URL. Case 4: The element contains a prefix. If this prefix is already listed in self.prefixes, no change is made. If the prefix is not yet in the list, we add it if possible (raise a NameSpace Exeption otherwise) Other cases: Just normalize the string (spaces, unicode) and return it. :param element: One of the 3 element of an RDF triple (subject, object or predicate). :return: Normalized string of an rdf triple element if it is a string """ # Case 1 - The element is an integer, and should remain so if isinstance(element, int): return element # Case 2 - The element contains a namespace. if element[0] == u'<' and element[-1] == u'>': ns_element = element[1:-1] matches = RE_PREFIX_SEPARATOR.search(ns_element) if matches: limit = matches.end() pref = ns_element[0:limit] # If the namespace is listed in the vocabulary # then the element is shortened using the namespace prefix # and the prefix is added to the list of prefixes. short_prefix = NameSpace(pref) if short_prefix: self.add_prefix(short_prefix) element = u'%s:%s' % (short_prefix.name, ns_element[limit:]) # Case 3 - The element is an URL. elif element[0:7] == 'http://': return u'<%s>' % element # Case 4 - The element contains a prefix elif u':' in element: pref, elem = element.split(u':') known_pref = NameSpace.__members__.get(pref) if known_pref: # The prefix is known. Then we add it. self.add_prefix(known_pref) # Enums labels with duplicate values are turned into aliases for the first such label. # This can lead to Namespaces prefix differences in the prefix list and the element. # In order to avoid this unwanted behavior, we transform the element prefix to its main alias. # E.g: element "dbpedia_owl:birthDate" will be transformed to "dbo:birthDate" (known_pref.name). if not known_pref == pref: # Duplicate names in NameSpace => using the main alias for the duplicate namespaces. element = element.replace(u'%s:' % pref, u'%s:' % known_pref.name) else: raise NameSpaceException( u"In the standard vocabulary, %s can't be found. " u"Without a prior declaration in the prefixes, it can't be used." % known_pref) # Other cases return normalize_str(element)
def _build_standard_query(self, entities_names, check_type=True, strict_mode=False): """ Updates the query_builder of the object. The queries options relies on the dictionaries contained in pyneql/utils/vocabulary.py. :param entities_names: the class variables beginning with 'has_' which have a value instantiated :param check_type: Boolean. Check the type of the object (e.g: Book, Person, Location,…) directly in the SPARQL queries. If True, the restriction of the object's type is done in the query. For instance, if the object is a Book, activating type checking will build queries where the object to find (?Book) is constrained by an union of RDF triples checking that ?Book is a Book: ``[…] { ?Book a fabio:Book } UNION […] UNION { ?Book a schemaorg:Book } .`` :param strict_mode: Boolean. Check the type of the object's attributes (e.g: label, first name,…) directly in the SPARQL queries. If True, the predicates of the triplet whose values are instantiated will have their types checked against the allowed types listed in ``self.voc_attributes``. Let's take an example: We are looking for a Thing whose *label* is "አዲስ አበባ". - Non strict mode will have its query restrained to elements satisfying the triplet ``?Thing ?has_label "አዲስ አበባ".``. The predicate is left undetermined (``?has_label`` is a variable). - In strict mode, we are strict about the types of predicates of the triplet. For the current class, those predicates will be listed in ``self.voc_attributes['has_label']`` and combined in the SPARQL query. Here, for the example, we set 'has_label' allowed the RDF predicates 'rdfs:label' and u'wdt:P1813'. >>> print(self.voc_attributes['has_label']) >>> [u'rdfs:label', u'wdt:P1813'] So in strict_mode, the query will be constrained to: ``[…]{ ?Thing rdfs:label "አዲስ አበባ" } UNION { ?Thing wdt:P1813 "አዲስ አበባ" }.[…]`` """ if check_type: # Restricting the query to elements of the current type # This will build a query with union of RDF checking the type (eg.Book): # [...] { ?Book a fabio:Book } UNION [...] UNION { ?Book a schemaorg:Book } . to_unite = set([]) for class_type in self.rdf_types: to_unite.add( RDFTriple(subject=self.args['subject'], predicate=u'a', object=class_type, language=self.query_language)) self.query_builder.add_query_alternative_triples(to_unite) # Adding query delimiters, that are the parameters given for query # (i.e stored in the instance variables beginning with "has_"). for entity_name in entities_names: entity_values = self.__dict__.get(entity_name, None) if is_listlike(entity_values): # TODO ici il faudrait créer des alternate triples self.create_triples_for_multiple_element( entity_name, entity_values) else: entity_value = normalize_str(entity_values) self.create_triples_for_single_element(entity_name, entity_value, strict_mode) # Fetching everything about that Thing self.query_builder.add_query_triple( RDFTriple(subject=self.args['subject'], predicate=self.args['predicate'], object=self.args['object'], language=self.query_language))