Beispiel #1
0
    def _process_results(self, check_type=True):
        """Given the result of a SPARQL query to find a Thing, this creates a Thing with all the information
        gathered in the self.attributes dictionary.
        :param check_type: Boolean. Check the type of the object (e.g: Book, Person, Location,…)
        in the SPARQL queries results. All the results which does not have the proper type are excluded.
        If True, the restriction of the object's type is done in the query.

        :Example:

        If the object is a Book, activating type checking will build queries where the object
        to find (?Book) is constrained by an union of RDF triples checking that ?Book is a Book:
        "[…] { ?Book a fabio:Book  } UNION […] UNION { ?Book a schemaorg:Book  } .
        """

        if not check_type:
            logging.warning("Type checking is disabled for object %s" %
                            str(self.__dict__))

        subj = self.args['subject'][1:]
        obj = self.args['object'][1:]
        pred = self.args['predicate'][1:]

        things = self._process_subject_url_results(pred, obj) \
            if self.__dict__.get('has_url', None) \
            else self._process_any_results(subj, pred, obj, check_type)

        # Wikimedia... (╯°□°)╯︵ ┻━┻ Accepting Wikimedia elements that correspond to an entity because
        # what we will have TODO is to filter them properly.
        unfiltered_wikimedia_things = [t for t in things.keys() if 'wd:' in t]
        for unfiltered_wikimedia_thing in unfiltered_wikimedia_things:
            things[unfiltered_wikimedia_thing][u'validated'] = 1

        things_items = list(things.items())
        for thing, thing_attribute in things_items:
            # Removing wrong things and adding the attributes of the correct thing
            if check_type and not thing_attribute.get(u'validated', False):
                things.pop(thing)
            else:
                # Identity
                self.attributes = merge_two_dicts_in_sets(
                    self.attributes, {
                        u'owl:sameAs': thing,
                        u'skos:exactMatch': thing
                    })

                self.attributes = merge_two_dicts_in_sets(
                    self.attributes, thing_attribute)

        # Fetching all the literals and organizing them by language
        self._organise_labels_by_language()
Beispiel #2
0
    def _recursively_find_more_about(self, seen=set([])):
        """TODO: document"""

        same_entities_keys = [u'skos:exactMatch', u'owl:sameAs']

        # TODO: dirty, make that more legible
        same_entity_uris = set(
            chain.from_iterable([
                self.attributes.get(same_entities_key) if isinstance(
                    self.attributes.get(same_entities_key), set) else set(
                        [self.attributes.get(same_entities_key)])
                for same_entities_key in same_entities_keys
            ]))

        if None in same_entity_uris:
            same_entity_uris.remove(None)

        to_see = same_entity_uris - seen
        if len(to_see) == 0:
            return

        for same_entity_uri in to_see:
            seen.add(same_entity_uri)
            same_entity = Thing(url=same_entity_uri)
            same_entity.add_query_endpoints(self.endpoints)
            same_entity.query()
            self.attributes = merge_two_dicts_in_sets(self.attributes,
                                                      same_entity.attributes)
        self._recursively_find_more_about(seen)
Beispiel #3
0
    def _get_life_info(self, life_event):
        """For a given information type (i.e death, birth), this function
        returns all information that is available in the linked data about the
        life event of the person (e.g: date and/or place).

        :param life_event: An event of the life of a person (e.g.: birth, death)
        :return: a dict of information concerning the given life event
        """
        biography_info = {}
        already_contains_birth_date = False  # True if and only if we already have a full date
        for k, v in self.attributes.items():
            k = k.lower()
            if life_event in k:
                all_info = v if isinstance(v, set) else {v}
                for info in all_info:
                    if info.count('-') > 4:
                        continue
                    if contains_a_date(info):
                        if already_contains_birth_date:
                            continue
                        try:
                            biography_info['date'] = parsedate(info)
                            already_contains_birth_date = 1
                        except ValueError:
                            # No available date info to parse
                            continue
                    elif 'place' in k:
                        biography_info = merge_two_dicts_in_sets(
                            biography_info,
                            {'place': info})
                    elif 'name' in k:
                        biography_info = merge_two_dicts_in_sets(
                            biography_info,
                            {'name': info})
                    elif 'cause' in k or 'manner' in k:
                        biography_info = merge_two_dicts_in_sets(
                            biography_info,
                            {'cause/manner': info})

                    else:
                        biography_info = merge_two_dicts_in_sets(
                            biography_info,
                            {'other': info})

        return biography_info
Beispiel #4
0
def test_utils_merge_two_dicts_in_sets2():
    """Recursive merge & append dict values: Should pass"""

    x = {'both1': 'botha1x', 'both2': 'botha2', 'only_x': 'only_x'}
    y = {'both1': 'botha1y', 'both2': 'botha2', 'only_y': 'only_y'}
    merged = merge_two_dicts_in_sets(x, y)
    truth = {
        'both1': set(['botha1x', 'botha1y']),
        'both2': set(['botha2']),
        'only_x': 'only_x',
        'only_y': 'only_y'
    }
    assert merged == truth
Beispiel #5
0
def test_utils_merge_two_dicts_in_sets1():
    """Recursive merge & append dict values: Should pass"""

    dic_y = {'both': {'both_y_diff': 'bar', 'both_same': 'same_y'}, 'only_y': 'only_y'}
    dic_x = {'both': {'both_x_diff': 'foo', 'both_same': 'same_x'}, 'only_x': {'only_x': 'baz'}}
    merged = merge_two_dicts_in_sets(dic_x, dic_y)

    truth = {
        'both': {'both_same': set(['same_x', 'same_y']),
                 'both_x_diff': 'foo',
                 'both_y_diff': 'bar'},
        'only_x': {'only_x': 'baz'},
        'only_y': 'only_y'
    }
    assert merged == truth
Beispiel #6
0
    def _process_any_results(self, subj, pred, obj, check_type=True):
        """ Return a dictionary of results for standard types of queries.
        TODO: Document better that part."""

        # The dict 'thing' will keep track of all the semantic web objects
        # that are returned by the query
        things = {}

        # We need to check only the object's instantiated "has_..." values
        values_to_check = {}
        for element, value in self.__dict__.items():
            if element.startswith('has_') and self.__dict__.get(element, None):
                if is_listlike(value):
                    for v in value:
                        values_to_check[v] = element
                else:
                    values_to_check[value] = element

        # If check_type is set to True, we need to check the type of the responses
        for result in self.query_builder.results:

            shortened_result = {}
            dict_results = {
                arg_name: get_shortened_uri(arg_value)
                for (arg_name, arg_value) in result
            }
            thing = dict_results.pop(subj, None)

            # Checking that the result is of the right type
            if check_type\
                    and dict_results[pred] in ['wdt:P31', 'rdf:type', 'rdfs:subClassOf']\
                    and dict_results[obj] in self.rdf_types:
                shortened_result[u'validated'] = 1

            shortened_result[dict_results[pred]] = dict_results[obj]

            things[thing] = merge_two_dicts_in_sets(things.get(thing, {}),
                                                    shortened_result)

        return things