Example #1
0
def extract_country_or_none(entity_extract, location_attributes=None):
    """Enrich a processed item in dict form (output of
    eWRT.ws.wikidata.extract_meta.collect_attributes_from_wp_and_wd)
    with country information using whatever local attribute is available
    :param entity_extract: input dict with attribute:value pairs"""
    if location_attributes is None:
        location_attributes = local_attributes

    try:
        entity_id = entity_extract['wikidata_id']
    except TypeError:
        entity_id = entity_extract.id
    entity = pywikibot.ItemPage(WIKIDATA_SITE, title=entity_id)
    try:
        countries_found = ParseItemPage.get_country_from_any(
            entity,
            local_attributes=location_attributes,
            languages=RELEVANT_LANGUAGES)
        if len(countries_found) > 1:
            warnings.warn(
                'More than one country found for entity {}'.format(entity_id))
        return countries_found
    except ValueError:
        warnings.warn(
            'Unable to determine country for entity {}!'.format(entity_id))
        return None
Example #2
0
def test_attribute_preferred_value():
    """test_complete_claim_details already implicitly tests that a preferred
    value is marked when present. This test focuses on the correct behaviour
    when this is not the case: A result without a 'preferred'-key for
    complete_claim_details, an error when 'attribute_preferred_value is called
    directly.
    """
    names = entity_mock.claims['P735']
    names_new = [copy.copy(name) for name in names]

    for name in names_new:
        name.rank = 'normal'
    names_without_preferred = ParseItemPage.complete_claim_details(
        'P735',
        names_new,
        ['en'],
        ['labels']
    )
    assert 'preferred' not in names_without_preferred

    try:
        attribute_preferred_value(names_new)
        raise AssertionError(
            'This should raise a ValueError: No item marked preferred!')
    except ValueError:
        pass
Example #3
0
def test_extract_literal_properties(literal_type, language, expected):
    """test the extraction of literals (labels, descriptions)
    parametrized for language and type and literal returns the expected
    and only the expected result."""
    result = ParseItemPage.extract_literal_properties(entity=entity_mock,
                                                      languages=[language],
                                                      literals=[literal_type]
                                                      )
    assert result == expected
Example #4
0
def item_with_country(wikipedia_title, language):
    itempage = wikidata_from_wptitle(wikipedia_title, language=language)
    country = extract_country_or_none(itempage)

    itempage['country'] = ParseItemPage.extract_literal_properties(country,
                                                                   RELEVANT_LANGUAGES)[
        'labels']['en']

    return itempage
Example #5
0
 def test_complete_claim_details(self):
     """With the given name parameter ('P735') as an example, test
     that the result is the expected result, including a list of values (first
     and second name), one marked as preferred (the first name)."""
     try:
         entity = self.entity_mock
         names = entity.text['claims']['P735']
         names_result = ParseItemPage.complete_claim_details(
             'P735', names, ['en'], ['labels'])
         assert names_result == self.expected_names_result
     except pywikibot.exceptions.MaxlagTimeoutError:
         warnings.warn('External API unreachable')
Example #6
0
def test_complete_claim_details():
    """With the given name parameter ('P735') as an example, test
    that the result is the expected result, including a list of values (first
    and second name), one marked as preferred (the first name)."""
    entity = entity_mock
    names = entity.text['claims']['P735']
    names_result = ParseItemPage.complete_claim_details(
        'P735',
        names,
        ['en'],
        ['labels']
    )
    assert names_result == expected_names_result
Example #7
0
def test_parseItemPage_all():
    entity = itempage
    import pprint
    parsed_without_attribute_labels = ParseItemPage(entity,
                                                    include_literals=True,
                                                    languages=['en', 'de',
                                                               'sv'],
                                                    resolve_country=False,
                                                    include_attribute_labels=False
                                                    ).details

    parsed_with_attribute_labels = ParseItemPage(entity, include_literals=True,
                                                 languages=['en', 'de', 'sv'],
                                                 resolve_country=False,
                                                 include_attribute_labels=True
                                                 ).details
    assert set(parsed_with_attribute_labels.keys()) == set(
        parsed_without_attribute_labels.keys())
    assert not any(
        ('labels' in val for val in parsed_without_attribute_labels.values()))
    # assert any(('labels' in val for val in parsed_with_attribute_labels.values()))
    assert all((parsed_with_attribute_labels[literal] ==
                parsed_without_attribute_labels[literal]
                for literal in ('labels', 'descriptions', 'aliases')))
    pprint.pprint(parsed_with_attribute_labels)

    assert result_without_timestamp(
        parsed_with_attribute_labels) == result_without_timestamp(sample_output)
    for val in parsed_with_attribute_labels.values():
        if 'values' in val and 'P18' not in val['url']:
            assert all(('labels' in sub_val for sub_val in val['values']))
    parsed_with_country = ParseItemPage(entity,
                                        include_literals=False,
                                        wd_parameters={},
                                        languages=['en', 'de',
                                                   'sv'],
                                        resolve_country=True,
                                        include_attribute_labels=True,
                                        qualifiers_of_interest=[],
                                        entity_type='person',
                                        ).details
    assert 'country' in parsed_with_country
    pprint.pprint(parsed_with_country['country'])
    pprint.pprint(parsed_with_country)
    assert parsed_with_country['country'] == {
        'url': 'https://www.wikidata.org/wiki/Property:P17',
        'values': [
            {'claim_id': u'q350$8E72D3A5-A067-47CB-AF45-C73ED7CFFF9E',
             'derived': True,
             'labels': {'de': u'Vereinigtes K\xf6nigreich',
                        'en': u'United Kingdom',
                        'sv': u'Storbritannien'},
             'url': u'https://www.wikidata.org/wiki/Q145'}]
    }
Example #8
0
def test_attribute_preferred_value():
    """test_complete_claim_details already implicitly tests that a preferred
    value is marked when present. This test focuses on the correct behaviour
    when this is not the case: A result without a 'preferred'-key for
    complete_claim_details, an error when 'attribute_preferred_value is called
    directly.
    """
    names = entity_mock.claims['P735']
    names_new = [copy.copy(name) for name in names]

    for name in names_new:
        name.rank = 'normal'
    names_without_preferred = ParseItemPage.complete_claim_details(
        'P735', names_new, ['en'], ['labels'])
    assert 'preferred' not in names_without_preferred

    try:
        attribute_preferred_value(names_new)
        raise AssertionError(
            'This should raise a ValueError: No item marked preferred!')
    except ValueError:
        pass
Example #9
0
def extract_country_or_none(entity_extract):
    """Enrich a processed item in dict form (output of
    eWRT.ws.wikidata.extract_meta.collect_attributes_from_wd_and_wd)
    with country information using whatever local attribute is available
    :param entity_extract: input dict with attribute:value pairs"""
    try:
        entity_id = entity_extract['wikidata_id']
    except TypeError:
        entity_id = entity_extract.id
    entity = pywikibot.ItemPage(WIKIDATA_SITE, title=entity_id)
    try:
        countries_found = ParseItemPage.get_country_from_any(entity,
                                                             local_attributes=local_attributes,
                                                             languages=RELEVANT_LANGUAGES,
                                                             literals=['labels'])
        if len(countries_found) > 1:
            warnings.warn(
                'More than one country found for entity {}'.format(entity_id))
        return countries_found
    except ValueError:
        warnings.warn(
            'Unable to determine country for entity {}!'.format(entity_id))
        return None
Example #10
0
def collect_attributes_from_wd_and_wd(itempage,
                                      languages,
                                      wd_parameters,
                                      include_literals=True):
    """

    :param itempage: ItemPage from which to collect information
    :param languages: list of languages in which to include literals and Wikipedia information
    :param wd_parameters: list of wikidata properties (Pxxx codes) to be included, if present
    :param include_literals: Include properties and alternate names. If false, only labels are
            included.
    :returns: a dictionary of the collected details about this entity from both Wikipedia and
            Wikidata.
    """
    # with open('wd_dump.json', 'w') as dump:
    # itempage.get()
    wikipedia_data = wp_summary_from_wdid(itempage.id,
                                          languages=languages,
                                          sitelinks=itempage.sitelinks)
    if not wikipedia_data:
        raise ValueError

    # use the Wikipedia article in the first language found as the entity's
    # unique preferred `url`.
    entity_extracted_details = {'url': wikipedia_data[0]['url']}
    for language in wikipedia_data:
        entity_extracted_details[language['language'] + 'wiki'] = language

    entity = ParseItemPage(itempage,
                           include_literals=include_literals,
                           claims_of_interest=wd_parameters,
                           languages=languages)
    entity_extracted_details.update(entity.details)
    entity_extracted_details['wikidata_id'] = itempage.id

    return entity_extracted_details
Example #11
0
def test_get_country_from_location():
    # we expect a ValueError when the only local attribute tried is
    # P17 = country - Douglas Adams doesn't have a country attribute
    try:
        country_found = ParseItemPage.get_country_from_any(
            entity_mock,
            local_attributes=['P17'],
            languages=['en'])
        raise ValueError('Country should not be identified, entity contains no '
                         'attribute P17!')
    except ValueError:
        pass

    # still a ValueError with local attributes not applicable to persons
    local_attributes = OrderedDict([
        ("P17", u"country"),
        ("P131", u"located in the administrative territorial entity"),
        ("P159", u"headquarters location"),
        ("P740", u"location of formation"),
    ])
    try:
        country_found = ParseItemPage.get_country_from_any(
            entity_mock,
            local_attributes=local_attributes,
            languages=['en'])
        raise ValueError('Country should not be identified, wrong type of '
                         'location attributes for person entity Douglas Adams!')
    except ValueError:
        pass

    # with birth place ranked higher than residence, we expect
    # UK
    local_attributes = OrderedDict([
        ("P17", u"country"),
        ("P131", u"located in the administrative territorial entity"),
        ("P19", u"place of birth"),
        ("P551", u"residence"),
        ("P27", u"country of citizenship"),
        ("P159", u"headquarters location"),
        ("P740", u"location of formation"),
    ])
    country_found = ParseItemPage.get_country_from_any(
        entity_mock,
        local_attributes=local_attributes,
        languages=['en'])
    assert len(country_found) == 1
    assert country_found[0]['url'] == u'https://www.wikidata.org/wiki/Q145'
    assert country_found[0]['labels'] == {'en': 'United Kingdom'}

    # with the attributes reordered, i. e. residence before place of birth,
    # this should return the United States (last residence: Santa Barbara
    local_attributes = OrderedDict([
        ("P17", u"country"),
        ("P131", u"located in the administrative territorial entity"),
        ("P551", u"residence"),
        ("P19", u"place of birth"),
        ("P27", u"country of citizenship"),
        ("P159", u"headquarters location"),
        ("P740", u"location of formation"), ])

    country_found = ParseItemPage.get_country_from_any(
        entity_mock,
        local_attributes=local_attributes,
        languages=['en'])
    assert len(country_found) == 1
    assert country_found == \
        [{'url': u'https://www.wikidata.org/wiki/Q30',
          'labels': {'en': u'United States of America'},
          'claim_id': u'q159288$0D0A08B9-BC36-4B45-B1CF-5547215DEFCB'
          # this claim is actually about Santa Barbara being in the US, not
          # about Adams per se
          }
         ]
Example #12
0
    def test_extract_literal_properties_freestanding(self):
        """

        :return:
        """
        try:
            claim = Claim.fromJSON(
                DataSite("wikidata", "wikidata"), {
                    u'type':
                    u'statement',
                    u'references': [{
                        u'snaks': {
                            u'P248': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 5375741
                                    }
                                },
                                u'property': u'P248',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash': u'355b56329b78db22be549dec34f2570ca61ca056',
                        u'snaks-order': [u'P248']
                    }, {
                        u'snaks': {
                            u'P1476': [{
                                u'datatype': u'monolingualtext',
                                u'datavalue': {
                                    u'type': u'monolingualtext',
                                    u'value': {
                                        u'text': u'Obituary: Douglas Adams',
                                        u'language': u'en'
                                    }
                                },
                                u'property': u'P1476',
                                u'snaktype': u'value'
                            }],
                            u'P407': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 1860
                                    }
                                },
                                u'property': u'P407',
                                u'snaktype': u'value'
                            }],
                            u'P813': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002013-12-07T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P813',
                                u'snaktype': u'value'
                            }],
                            u'P1433': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 11148
                                    }
                                },
                                u'property': u'P1433',
                                u'snaktype': u'value'
                            }],
                            u'P854': [{
                                u'datatype': u'url',
                                u'datavalue': {
                                    u'type':
                                    u'string',
                                    u'value':
                                    u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books'
                                },
                                u'property': u'P854',
                                u'snaktype': u'value'
                            }],
                            u'P577': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002001-05-15T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P577',
                                u'snaktype': u'value'
                            }],
                            u'P50': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 18145749
                                    }
                                },
                                u'property': u'P50',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash':
                        u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49',
                        u'snaks-order': [
                            u'P854', u'P577', u'P813', u'P1433', u'P50',
                            u'P1476', u'P407'
                        ]
                    }, {
                        u'snaks': {
                            u'P123': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 192621
                                    }
                                },
                                u'property': u'P123',
                                u'snaktype': u'value'
                            }],
                            u'P1476': [{
                                u'datatype': u'monolingualtext',
                                u'datavalue': {
                                    u'type': u'monolingualtext',
                                    u'value': {
                                        u'text':
                                        u"Hitch Hiker's Guide author Douglas Adams dies aged 49",
                                        u'language': u'en'
                                    }
                                },
                                u'property': u'P1476',
                                u'snaktype': u'value'
                            }],
                            u'P407': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 1860
                                    }
                                },
                                u'property': u'P407',
                                u'snaktype': u'value'
                            }],
                            u'P813': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002015-01-03T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P813',
                                u'snaktype': u'value'
                            }],
                            u'P854': [{
                                u'datatype': u'url',
                                u'datavalue': {
                                    u'type':
                                    u'string',
                                    u'value':
                                    u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html'
                                },
                                u'property': u'P854',
                                u'snaktype': u'value'
                            }],
                            u'P577': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002001-05-13T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P577',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash':
                        u'51a934797fd7f7d3ee91d4d541356d4c5974075b',
                        u'snaks-order': [
                            u'P1476', u'P577', u'P123', u'P407', u'P854',
                            u'P813'
                        ]
                    }, {
                        u'snaks': {
                            u'P248': [{
                                u'datatype': u'wikibase-item',
                                u'datavalue': {
                                    u'type': u'wikibase-entityid',
                                    u'value': {
                                        u'entity-type': u'item',
                                        u'numeric-id': 36578
                                    }
                                },
                                u'property': u'P248',
                                u'snaktype': u'value'
                            }],
                            u'P813': [{
                                u'datatype': u'time',
                                u'datavalue': {
                                    u'type': u'time',
                                    u'value': {
                                        u'after': 0,
                                        u'precision': 11,
                                        u'time':
                                        u'+00000002015-07-07T00:00:00Z',
                                        u'timezone': 0,
                                        u'calendarmodel':
                                        u'http://www.wikidata.org/entity/Q1985727',
                                        u'before': 0
                                    }
                                },
                                u'property': u'P813',
                                u'snaktype': u'value'
                            }],
                            u'P227': [{
                                u'datatype': u'external-id',
                                u'datavalue': {
                                    u'type': u'string',
                                    u'value': u'119033364'
                                },
                                u'property': u'P227',
                                u'snaktype': u'value'
                            }]
                        },
                        u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64',
                        u'snaks-order': [u'P248', u'P227', u'P813']
                    }],
                    u'mainsnak': {
                        u'datatype': u'wikibase-item',
                        u'datavalue': {
                            u'type': u'wikibase-entityid',
                            u'value': {
                                u'entity-type': u'item',
                                u'numeric-id': 350
                            }
                        },
                        u'property': u'P19',
                        u'snaktype': u'value'
                    },
                    u'id':
                    u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518',
                    u'rank':
                    u'normal'
                })
            # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id'])

            target = claim.target
            # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id)
            result = ParseItemPage.extract_literal_properties(
                entity=target, languages=['en'], literals=['labels'])
            print(result)
            assert result['labels']['en'] == 'Cambridge'
            entity_id = 'Q350'
            target = ItemPage.from_entity_uri(
                site=DataSite('wikidata', 'wikidata'),
                uri='http://www.wikidata.org/entity' + '/' + entity_id)
            print(target)
            result = ParseItemPage.extract_literal_properties(
                entity=target, languages=['en'], literals=['labels'])
            print(result)
            assert result['labels']['en'] == 'Cambridge'
        except pywikibot.exceptions.MaxlagTimeoutError:
            warnings.warn('External API unreachable')
Example #13
0
 def test_parseItemPage_filter(self):
     """Filtering method, allows to filter entities by a) presence of a certain
     parameter or b) maximal/minimal value (use +/- prefixed string for dates!)"""
     try:
         try:
             filter_params = {'person': [('P39', 'has_attr', None)]}
             parsed_with_filter = ParseItemPage(
                 itempage,
                 include_literals=True,
                 languages=['en', 'de', 'sv'],
                 resolve_country=False,
                 include_attribute_labels=False,
                 param_filter=filter_params,
                 entity_type='person').details
             raise ValueError(
                 'The sample itempage does not contain a claim "P39", '
                 'this should raise an error!')
         except DoesNotMatchFilterError:
             pass
         try:
             filter_params = {'person': [('P569', 'min', '+1952-01-01')]}
             parsed_with_filter = ParseItemPage(
                 itempage,
                 include_literals=True,
                 languages=['en', 'de', 'sv'],
                 resolve_country=False,
                 include_attribute_labels=False,
                 param_filter=filter_params,
                 entity_type='person').details
             parsed_without_filter = ParseItemPage(
                 itempage,
                 include_literals=True,
                 languages=['en', 'de', 'sv'],
                 resolve_country=False,
                 include_attribute_labels=False,
                 entity_type='person').details
             assert parsed_with_filter == parsed_without_filter
         except ValueError:
             raise ValueError(
                 'The sample itempage does contain a claim "P19" '
                 '(place of birth), this should pass the filter')
         try:
             filter_params = {'person': [('P569', 'min', '+1952-01-01')]}
             parsed_with_filter = ParseItemPage(
                 itempage,
                 include_literals=True,
                 languages=['en', 'de', 'sv'],
                 resolve_country=False,
                 include_attribute_labels=False,
                 param_filter=filter_params,
                 entity_type='person').details
         except ValueError:
             raise ValueError(
                 'Failed to identify Douglas Adams birth date as '
                 '>= 1952')
         try:
             filter_params = {'person': [('P569', 'min', '+1955-01-01')]}
             parsed_with_filter = ParseItemPage(
                 itempage,
                 include_literals=True,
                 languages=['en', 'de', 'sv'],
                 resolve_country=False,
                 include_attribute_labels=False,
                 param_filter=filter_params,
                 entity_type='person').details
             raise ValueError(
                 'Douglas Adams misidentified as being younger than '
                 '1955-01-01')
         except DoesNotMatchFilterError:
             pass
     except pywikibot.exceptions.MaxlagTimeoutError:
         warnings.warn('External API unreachable')
Example #14
0
    def test_get_country_from_location(self):
        # we expect a ValueError when the only local attribute tried is
        # P17 = country - Douglas Adams doesn't have a country attribute
        try:
            try:
                country_found = ParseItemPage.get_country_from_any(
                    self.entity_mock,
                    local_attributes=['P17'],
                    languages=['en'])
                raise ValueError(
                    'Country should not be identified, entity contains no '
                    'attribute P17!')
            except ValueError:
                pass

            # still a ValueError with local attributes not applicable to persons
            local_attributes = OrderedDict([
                ("P17", u"country"),
                ("P131", u"located in the administrative territorial entity"),
                ("P159", u"headquarters location"),
                ("P740", u"location of formation"),
            ])
            try:
                country_found = ParseItemPage.get_country_from_any(
                    self.entity_mock,
                    local_attributes=local_attributes,
                    languages=['en'])
                raise ValueError(
                    'Country should not be identified, wrong type of '
                    'location attributes for person entity Douglas Adams!')
            except ValueError:
                pass

            # with birth place ranked higher than residence, we expect
            # UK
            local_attributes = OrderedDict([
                ("P17", u"country"),
                ("P131", u"located in the administrative territorial entity"),
                ("P19", u"place of birth"),
                ("P551", u"residence"),
                ("P27", u"country of citizenship"),
                ("P159", u"headquarters location"),
                ("P740", u"location of formation"),
            ])
            country_found = ParseItemPage.get_country_from_any(
                self.entity_mock,
                local_attributes=local_attributes,
                languages=['en'])
            assert len(country_found) == 1
            assert country_found[0][
                'url'] == u'https://www.wikidata.org/wiki/Q145'
            assert country_found[0]['labels'] == {'en': 'United Kingdom'}

            # with the attributes reordered, i. e. residence before place of birth,
            # this should return the United States (last residence: Santa Barbara
            local_attributes = OrderedDict([
                ("P17", u"country"),
                ("P131", u"located in the administrative territorial entity"),
                ("P551", u"residence"),
                ("P19", u"place of birth"),
                ("P27", u"country of citizenship"),
                ("P159", u"headquarters location"),
                ("P740", u"location of formation"),
            ])

            country_found = ParseItemPage.get_country_from_any(
                self.entity_mock,
                local_attributes=local_attributes,
                languages=['en'])
            assert len(country_found) == 1
            assert country_found == \
                [{'url': u'https://www.wikidata.org/wiki/Q30',
                  'labels': {'en': u'United States of America'},
                  'claim_id': u'q159288$0D0A08B9-BC36-4B45-B1CF-5547215DEFCB'
                  # this claim is actually about Santa Barbara being in the US, not
                  # about Adams per se
                  }
                 ]
        except pywikibot.exceptions.MaxlagTimeoutError:
            warnings.warn('External API unreachable')
Example #15
0
def collect_attributes_from_wp_and_wd(itempage,
                                      languages,
                                      include_wikipedia=False,
                                      raise_on_no_wikipage=False,
                                      delay_wikipedia_retrieval=False,
                                      entity_type=None,
                                      **kwargs):
    """

    :param itempage: ItemPage from which to collect information
    :param languages: list of languages in which to include literals
            and Wikipedia information (2-character{} ISO codes).
    :param raise_on_no_wikipage: Controls whether an error is raised when
            no Wikipedia page in any of the requested languages can be
            identified for this entity. If True (default), no further meta-
            data about such entities is collected from WikiData. If False,
            meta-data is still collected.
    :param include_wikipedia: Include information from Wikipedia pages
            on entity (summary, revision id & timestamp, exact url)
    :param delay_wikipedia_retrieval: Return only the sitelinks of existing
            Wikipedia pages in the relevant languages (True) or make a call
            to the Wikipedia API directly (False). The default `False` makes
            for fairly expensive operations, where possible, True should be
            used.
    :returns: a dictionary of the collected details about this entity from
            both Wikipedia and Wikidata.
    """

    if hasattr(itempage, 'text'):
        id = itempage.id
        try:
            timestamp = get_wikidata_timestamp(itempage)
        except AttributeError:
            pass
        itempage = itempage.text
        itempage.update({'id': id, 'timestamp': timestamp})

    wikipedia_data = []
    if include_wikipedia:
        sitelinks = itempage['sitelinks']
        if languages:
            relevant_sitelinks = {
                wiki: content
                for wiki, content in sitelinks.items()
                if any([lang + 'wiki' == wiki for lang in languages])
            }
        else:
            relevant_sitelinks = sitelinks
        try:
            sitelinks = {
                wiki: content['title']
                for wiki, content in relevant_sitelinks.items()
            }
            pass
        except TypeError as e:
            sitelinks = relevant_sitelinks
            pass

        if not sitelinks:
            if raise_on_no_wikipage:
                raise ValueError
            else:
                pass

        if delay_wikipedia_retrieval:
            wikipedia_data = {
                wiki: sitelinks[wiki]
                for wiki in relevant_sitelinks
            }
            try:
                wikipedia_data = {
                    wiki: wikipedia_data[wiki]['title']
                    for wiki in wikipedia_data
                }
            except TypeError:
                pass

        elif sitelinks:
            try:
                wikipedia_data = wp_summary_from_wdid(itempage['id'],
                                                      languages=languages,
                                                      sitelinks=sitelinks)

            except (RedirectError, DisambiguationError):
                logger.warning(
                    'Failed to determine Wikipedia article: linked '
                    'article is redirect or disambiguation page.',
                    exc_info=True)
                raise ValueError
            except requests.exceptions.ConnectionError:
                logger.warning(
                    'Failed to get info about entity %s from '
                    'Wikipedia API!',
                    itempage['id'],
                    exc_info=True)

    try:
        entity_extracted_details = {'url': wikipedia_data[0]['url']}
    except (KeyError, IndexError):
        # fallback to Wikidata ID if no Wikipedia page has been retrieved (yet)
        entity_extracted_details = {
            'url': 'https://www.wikidata.org/wiki/' + itempage['id']
        }
    if delay_wikipedia_retrieval:
        entity_extracted_details.update(wikipedia_data)
    elif include_wikipedia:
        for language in wikipedia_data:
            entity_extracted_details[language['language'] + 'wiki'] = language

    try:
        entity = ParseItemPage(itempage,
                               languages=languages,
                               entity_type=entity_type,
                               **kwargs)
    except AssertionError:
        raise ValueError(
            'No attributes of interest identified for entity{}'.format(
                itempage['id']))
    except DoesNotMatchFilterError:
        raise DoesNotMatchFilterError(
            'entity {} does not match filter criteria'.format(itempage['id']))
    except Exception as e:
        logger.warn(
            'Uncaught Exception: {}. Entity {} will not be processed.'.format(
                e, itempage['id']),
            exc_info=True)
    entity_extracted_details.update(entity.details)
    if include_wikipedia and not delay_wikipedia_retrieval:
        for language_result in merge_with_wikipedia_by_language(
                entity=entity_extracted_details, languages=languages):
            yield language_result
    else:
        yield entity_extracted_details
Example #16
0
def test_extract_literal_properties_freestanding():
    """

    :return:
    """
    claim = Claim.fromJSON(DataSite("wikidata", "wikidata"),
                           {u'type': u'statement', u'references': [{
                               u'snaks': {
                                   u'P248': [
                                       {
                                           u'datatype': u'wikibase-item',
                                           u'datavalue': {
                                               u'type': u'wikibase-entityid',
                                               u'value': {
                                                   u'entity-type': u'item',
                                                   u'numeric-id': 5375741}},
                                           u'property': u'P248',
                                           u'snaktype': u'value'}]},
                               u'hash': u'355b56329b78db22be549dec34f2570ca61ca056',
                               u'snaks-order': [
                                   u'P248']},
                               {
                               u'snaks': {
                                   u'P1476': [
                                       {
                                           u'datatype': u'monolingualtext',
                                           u'datavalue': {
                                               u'type': u'monolingualtext',
                                               u'value': {
                                                   u'text': u'Obituary: Douglas Adams',
                                                   u'language': u'en'}},
                                           u'property': u'P1476',
                                           u'snaktype': u'value'}],
                                   u'P407': [
                                       {
                                           u'datatype': u'wikibase-item',
                                           u'datavalue': {
                                               u'type': u'wikibase-entityid',
                                               u'value': {
                                                   u'entity-type': u'item',
                                                   u'numeric-id': 1860}},
                                           u'property': u'P407',
                                           u'snaktype': u'value'}],
                                   u'P813': [
                                       {
                                           u'datatype': u'time',
                                           u'datavalue': {
                                               u'type': u'time',
                                               u'value': {
                                                   u'after': 0,
                                                   u'precision': 11,
                                                   u'time': u'+00000002013-12-07T00:00:00Z',
                                                   u'timezone': 0,
                                                   u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727',
                                                   u'before': 0}},
                                           u'property': u'P813',
                                           u'snaktype': u'value'}],
                                   u'P1433': [
                                       {
                                           u'datatype': u'wikibase-item',
                                           u'datavalue': {
                                               u'type': u'wikibase-entityid',
                                               u'value': {
                                                   u'entity-type': u'item',
                                                   u'numeric-id': 11148}},
                                           u'property': u'P1433',
                                           u'snaktype': u'value'}],
                                   u'P854': [
                                       {
                                           u'datatype': u'url',
                                           u'datavalue': {
                                               u'type': u'string',
                                               u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books'},
                                           u'property': u'P854',
                                           u'snaktype': u'value'}],
                                   u'P577': [
                                       {
                                           u'datatype': u'time',
                                           u'datavalue': {
                                               u'type': u'time',
                                               u'value': {
                                                   u'after': 0,
                                                   u'precision': 11,
                                                   u'time': u'+00000002001-05-15T00:00:00Z',
                                                   u'timezone': 0,
                                                   u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727',
                                                   u'before': 0}},
                                           u'property': u'P577',
                                           u'snaktype': u'value'}],
                                   u'P50': [
                                       {
                                           u'datatype': u'wikibase-item',
                                           u'datavalue': {
                                               u'type': u'wikibase-entityid',
                                               u'value': {
                                                   u'entity-type': u'item',
                                                   u'numeric-id': 18145749}},
                                           u'property': u'P50',
                                           u'snaktype': u'value'}]},
                               u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49',
                               u'snaks-order': [
                                   u'P854',
                                   u'P577',
                                   u'P813',
                                   u'P1433',
                                   u'P50',
                                   u'P1476',
                                   u'P407']},
                               {
                               u'snaks': {
                                   u'P123': [
                                       {
                                           u'datatype': u'wikibase-item',
                                           u'datavalue': {
                                               u'type': u'wikibase-entityid',
                                               u'value': {
                                                   u'entity-type': u'item',
                                                   u'numeric-id': 192621}},
                                           u'property': u'P123',
                                           u'snaktype': u'value'}],
                                   u'P1476': [
                                       {
                                           u'datatype': u'monolingualtext',
                                           u'datavalue': {
                                               u'type': u'monolingualtext',
                                               u'value': {
                                                   u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49",
                                                   u'language': u'en'}},
                                           u'property': u'P1476',
                                           u'snaktype': u'value'}],
                                   u'P407': [
                                       {
                                           u'datatype': u'wikibase-item',
                                           u'datavalue': {
                                               u'type': u'wikibase-entityid',
                                               u'value': {
                                                   u'entity-type': u'item',
                                                   u'numeric-id': 1860}},
                                           u'property': u'P407',
                                           u'snaktype': u'value'}],
                                   u'P813': [
                                       {
                                           u'datatype': u'time',
                                           u'datavalue': {
                                               u'type': u'time',
                                               u'value': {
                                                   u'after': 0,
                                                   u'precision': 11,
                                                   u'time': u'+00000002015-01-03T00:00:00Z',
                                                   u'timezone': 0,
                                                   u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727',
                                                   u'before': 0}},
                                           u'property': u'P813',
                                           u'snaktype': u'value'}],
                                   u'P854': [
                                       {
                                           u'datatype': u'url',
                                           u'datavalue': {
                                               u'type': u'string',
                                               u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html'},
                                           u'property': u'P854',
                                           u'snaktype': u'value'}],
                                   u'P577': [
                                       {
                                           u'datatype': u'time',
                                           u'datavalue': {
                                               u'type': u'time',
                                               u'value': {
                                                   u'after': 0,
                                                   u'precision': 11,
                                                   u'time': u'+00000002001-05-13T00:00:00Z',
                                                   u'timezone': 0,
                                                   u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727',
                                                   u'before': 0}},
                                           u'property': u'P577',
                                           u'snaktype': u'value'}]},
                               u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b',
                               u'snaks-order': [
                                   u'P1476',
                                   u'P577',
                                   u'P123',
                                   u'P407',
                                   u'P854',
                                   u'P813']},
                               {
                               u'snaks': {
                                   u'P248': [
                                       {
                                           u'datatype': u'wikibase-item',
                                           u'datavalue': {
                                               u'type': u'wikibase-entityid',
                                               u'value': {
                                                   u'entity-type': u'item',
                                                   u'numeric-id': 36578}},
                                           u'property': u'P248',
                                           u'snaktype': u'value'}],
                                   u'P813': [
                                       {
                                           u'datatype': u'time',
                                           u'datavalue': {
                                               u'type': u'time',
                                               u'value': {
                                                   u'after': 0,
                                                   u'precision': 11,
                                                   u'time': u'+00000002015-07-07T00:00:00Z',
                                                   u'timezone': 0,
                                                   u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727',
                                                   u'before': 0}},
                                           u'property': u'P813',
                                           u'snaktype': u'value'}],
                                   u'P227': [
                                       {
                                           u'datatype': u'external-id',
                                           u'datavalue': {
                                               u'type': u'string',
                                               u'value': u'119033364'},
                                           u'property': u'P227',
                                           u'snaktype': u'value'}]},
                               u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64',
                               u'snaks-order': [
                                   u'P248',
                                   u'P227',
                                   u'P813']}],
                            u'mainsnak': {
                               u'datatype': u'wikibase-item',
                               u'datavalue': {
                                   u'type': u'wikibase-entityid',
                                   u'value': {
                                       u'entity-type': u'item',
                                       u'numeric-id': 350}},
                               u'property': u'P19',
                               u'snaktype': u'value'},
                            u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518',
                            u'rank': u'normal'})
    # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id'])

    target = claim.target
    # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id)
    result = ParseItemPage.extract_literal_properties(
        entity=target, languages=['en'], literals=['labels'])
    print result
    assert result['labels']['en'] == 'Cambridge'
    entity_id = 'Q350'
    target = ItemPage.from_entity_uri(
        site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id)
    print target
    result = ParseItemPage.extract_literal_properties(
        entity=target, languages=['en'], literals=['labels'])
    print result
    assert result['labels']['en'] == 'Cambridge'
Example #17
0
    def test_parseItemPage_all(self):
        try:
            entity = itempage
            import pprint
            parsed_without_attribute_labels = ParseItemPage(
                entity,
                include_literals=True,
                languages=['en', 'de', 'sv'],
                resolve_country=False,
                include_attribute_labels=False).details

            parsed_with_attribute_labels = ParseItemPage(
                entity,
                include_literals=True,
                languages=['en', 'de', 'sv'],
                resolve_country=False,
                include_attribute_labels=True).details
            assert set(parsed_with_attribute_labels.keys()) == set(
                parsed_without_attribute_labels.keys())
            assert not any(
                ('labels' in val
                 for val in list(parsed_without_attribute_labels.values())))
            # assert any(('labels' in val for val in parsed_with_attribute_labels.values()))
            assert all((parsed_with_attribute_labels[literal] ==
                        parsed_without_attribute_labels[literal]
                        for literal in ('labels', 'descriptions', 'aliases')))
            pprint.pprint(parsed_with_attribute_labels)

            assert self.result_without_timestamp(
                parsed_with_attribute_labels) == self.result_without_timestamp(
                    sample_output)
            for val in list(parsed_with_attribute_labels.values()):
                if 'values' in val and 'P18' not in val['url']:
                    assert all(
                        ('labels' in sub_val for sub_val in val['values']))
            parsed_with_country = ParseItemPage(
                entity,
                include_literals=False,
                wd_parameters={},
                languages=['en', 'de', 'sv'],
                resolve_country=True,
                include_attribute_labels=True,
                qualifiers_of_interest=[],
                entity_type='person',
            ).details
            assert 'country' in parsed_with_country
            pprint.pprint(parsed_with_country['country'])
            pprint.pprint(parsed_with_country)
            assert parsed_with_country['country'] == {
                'url':
                'https://www.wikidata.org/wiki/Property:P17',
                'values': [{
                    'claim_id': u'q350$8E72D3A5-A067-47CB-AF45-C73ED7CFFF9E',
                    'derived': True,
                    'labels': {
                        'de': u'Vereinigtes K\xf6nigreich',
                        'en': u'United Kingdom',
                        'sv': u'Storbritannien'
                    },
                    'url': u'https://www.wikidata.org/wiki/Q145'
                }]
            }
        except pywikibot.exceptions.MaxlagTimeoutError:
            warnings.warn('External API unreachable')