def extract_country_or_none(entity_extract, location_attributes=None): """Enrich a processed item in dict form (output of eWRT.ws.wikidata.extract_meta.collect_attributes_from_wp_and_wd) with country information using whatever local attribute is available :param entity_extract: input dict with attribute:value pairs""" if location_attributes is None: location_attributes = local_attributes try: entity_id = entity_extract['wikidata_id'] except TypeError: entity_id = entity_extract.id entity = pywikibot.ItemPage(WIKIDATA_SITE, title=entity_id) try: countries_found = ParseItemPage.get_country_from_any( entity, local_attributes=location_attributes, languages=RELEVANT_LANGUAGES) if len(countries_found) > 1: warnings.warn( 'More than one country found for entity {}'.format(entity_id)) return countries_found except ValueError: warnings.warn( 'Unable to determine country for entity {}!'.format(entity_id)) return None
def test_attribute_preferred_value(): """test_complete_claim_details already implicitly tests that a preferred value is marked when present. This test focuses on the correct behaviour when this is not the case: A result without a 'preferred'-key for complete_claim_details, an error when 'attribute_preferred_value is called directly. """ names = entity_mock.claims['P735'] names_new = [copy.copy(name) for name in names] for name in names_new: name.rank = 'normal' names_without_preferred = ParseItemPage.complete_claim_details( 'P735', names_new, ['en'], ['labels'] ) assert 'preferred' not in names_without_preferred try: attribute_preferred_value(names_new) raise AssertionError( 'This should raise a ValueError: No item marked preferred!') except ValueError: pass
def test_extract_literal_properties(literal_type, language, expected): """test the extraction of literals (labels, descriptions) parametrized for language and type and literal returns the expected and only the expected result.""" result = ParseItemPage.extract_literal_properties(entity=entity_mock, languages=[language], literals=[literal_type] ) assert result == expected
def item_with_country(wikipedia_title, language): itempage = wikidata_from_wptitle(wikipedia_title, language=language) country = extract_country_or_none(itempage) itempage['country'] = ParseItemPage.extract_literal_properties(country, RELEVANT_LANGUAGES)[ 'labels']['en'] return itempage
def test_complete_claim_details(self): """With the given name parameter ('P735') as an example, test that the result is the expected result, including a list of values (first and second name), one marked as preferred (the first name).""" try: entity = self.entity_mock names = entity.text['claims']['P735'] names_result = ParseItemPage.complete_claim_details( 'P735', names, ['en'], ['labels']) assert names_result == self.expected_names_result except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')
def test_complete_claim_details(): """With the given name parameter ('P735') as an example, test that the result is the expected result, including a list of values (first and second name), one marked as preferred (the first name).""" entity = entity_mock names = entity.text['claims']['P735'] names_result = ParseItemPage.complete_claim_details( 'P735', names, ['en'], ['labels'] ) assert names_result == expected_names_result
def test_parseItemPage_all(): entity = itempage import pprint parsed_without_attribute_labels = ParseItemPage(entity, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=False ).details parsed_with_attribute_labels = ParseItemPage(entity, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=True ).details assert set(parsed_with_attribute_labels.keys()) == set( parsed_without_attribute_labels.keys()) assert not any( ('labels' in val for val in parsed_without_attribute_labels.values())) # assert any(('labels' in val for val in parsed_with_attribute_labels.values())) assert all((parsed_with_attribute_labels[literal] == parsed_without_attribute_labels[literal] for literal in ('labels', 'descriptions', 'aliases'))) pprint.pprint(parsed_with_attribute_labels) assert result_without_timestamp( parsed_with_attribute_labels) == result_without_timestamp(sample_output) for val in parsed_with_attribute_labels.values(): if 'values' in val and 'P18' not in val['url']: assert all(('labels' in sub_val for sub_val in val['values'])) parsed_with_country = ParseItemPage(entity, include_literals=False, wd_parameters={}, languages=['en', 'de', 'sv'], resolve_country=True, include_attribute_labels=True, qualifiers_of_interest=[], entity_type='person', ).details assert 'country' in parsed_with_country pprint.pprint(parsed_with_country['country']) pprint.pprint(parsed_with_country) assert parsed_with_country['country'] == { 'url': 'https://www.wikidata.org/wiki/Property:P17', 'values': [ {'claim_id': u'q350$8E72D3A5-A067-47CB-AF45-C73ED7CFFF9E', 'derived': True, 'labels': {'de': u'Vereinigtes K\xf6nigreich', 'en': u'United Kingdom', 'sv': u'Storbritannien'}, 'url': u'https://www.wikidata.org/wiki/Q145'}] }
def test_attribute_preferred_value(): """test_complete_claim_details already implicitly tests that a preferred value is marked when present. This test focuses on the correct behaviour when this is not the case: A result without a 'preferred'-key for complete_claim_details, an error when 'attribute_preferred_value is called directly. """ names = entity_mock.claims['P735'] names_new = [copy.copy(name) for name in names] for name in names_new: name.rank = 'normal' names_without_preferred = ParseItemPage.complete_claim_details( 'P735', names_new, ['en'], ['labels']) assert 'preferred' not in names_without_preferred try: attribute_preferred_value(names_new) raise AssertionError( 'This should raise a ValueError: No item marked preferred!') except ValueError: pass
def extract_country_or_none(entity_extract): """Enrich a processed item in dict form (output of eWRT.ws.wikidata.extract_meta.collect_attributes_from_wd_and_wd) with country information using whatever local attribute is available :param entity_extract: input dict with attribute:value pairs""" try: entity_id = entity_extract['wikidata_id'] except TypeError: entity_id = entity_extract.id entity = pywikibot.ItemPage(WIKIDATA_SITE, title=entity_id) try: countries_found = ParseItemPage.get_country_from_any(entity, local_attributes=local_attributes, languages=RELEVANT_LANGUAGES, literals=['labels']) if len(countries_found) > 1: warnings.warn( 'More than one country found for entity {}'.format(entity_id)) return countries_found except ValueError: warnings.warn( 'Unable to determine country for entity {}!'.format(entity_id)) return None
def collect_attributes_from_wd_and_wd(itempage, languages, wd_parameters, include_literals=True): """ :param itempage: ItemPage from which to collect information :param languages: list of languages in which to include literals and Wikipedia information :param wd_parameters: list of wikidata properties (Pxxx codes) to be included, if present :param include_literals: Include properties and alternate names. If false, only labels are included. :returns: a dictionary of the collected details about this entity from both Wikipedia and Wikidata. """ # with open('wd_dump.json', 'w') as dump: # itempage.get() wikipedia_data = wp_summary_from_wdid(itempage.id, languages=languages, sitelinks=itempage.sitelinks) if not wikipedia_data: raise ValueError # use the Wikipedia article in the first language found as the entity's # unique preferred `url`. entity_extracted_details = {'url': wikipedia_data[0]['url']} for language in wikipedia_data: entity_extracted_details[language['language'] + 'wiki'] = language entity = ParseItemPage(itempage, include_literals=include_literals, claims_of_interest=wd_parameters, languages=languages) entity_extracted_details.update(entity.details) entity_extracted_details['wikidata_id'] = itempage.id return entity_extracted_details
def test_get_country_from_location(): # we expect a ValueError when the only local attribute tried is # P17 = country - Douglas Adams doesn't have a country attribute try: country_found = ParseItemPage.get_country_from_any( entity_mock, local_attributes=['P17'], languages=['en']) raise ValueError('Country should not be identified, entity contains no ' 'attribute P17!') except ValueError: pass # still a ValueError with local attributes not applicable to persons local_attributes = OrderedDict([ ("P17", u"country"), ("P131", u"located in the administrative territorial entity"), ("P159", u"headquarters location"), ("P740", u"location of formation"), ]) try: country_found = ParseItemPage.get_country_from_any( entity_mock, local_attributes=local_attributes, languages=['en']) raise ValueError('Country should not be identified, wrong type of ' 'location attributes for person entity Douglas Adams!') except ValueError: pass # with birth place ranked higher than residence, we expect # UK local_attributes = OrderedDict([ ("P17", u"country"), ("P131", u"located in the administrative territorial entity"), ("P19", u"place of birth"), ("P551", u"residence"), ("P27", u"country of citizenship"), ("P159", u"headquarters location"), ("P740", u"location of formation"), ]) country_found = ParseItemPage.get_country_from_any( entity_mock, local_attributes=local_attributes, languages=['en']) assert len(country_found) == 1 assert country_found[0]['url'] == u'https://www.wikidata.org/wiki/Q145' assert country_found[0]['labels'] == {'en': 'United Kingdom'} # with the attributes reordered, i. e. residence before place of birth, # this should return the United States (last residence: Santa Barbara local_attributes = OrderedDict([ ("P17", u"country"), ("P131", u"located in the administrative territorial entity"), ("P551", u"residence"), ("P19", u"place of birth"), ("P27", u"country of citizenship"), ("P159", u"headquarters location"), ("P740", u"location of formation"), ]) country_found = ParseItemPage.get_country_from_any( entity_mock, local_attributes=local_attributes, languages=['en']) assert len(country_found) == 1 assert country_found == \ [{'url': u'https://www.wikidata.org/wiki/Q30', 'labels': {'en': u'United States of America'}, 'claim_id': u'q159288$0D0A08B9-BC36-4B45-B1CF-5547215DEFCB' # this claim is actually about Santa Barbara being in the US, not # about Adams per se } ]
def test_extract_literal_properties_freestanding(self): """ :return: """ try: claim = Claim.fromJSON( DataSite("wikidata", "wikidata"), { u'type': u'statement', u'references': [{ u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 5375741 } }, u'property': u'P248', u'snaktype': u'value' }] }, u'hash': u'355b56329b78db22be549dec34f2570ca61ca056', u'snaks-order': [u'P248'] }, { u'snaks': { u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u'Obituary: Douglas Adams', u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002013-12-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P1433': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 11148 } }, u'property': u'P1433', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-15T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }], u'P50': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 18145749 } }, u'property': u'P50', u'snaktype': u'value' }] }, u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49', u'snaks-order': [ u'P854', u'P577', u'P813', u'P1433', u'P50', u'P1476', u'P407' ] }, { u'snaks': { u'P123': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 192621 } }, u'property': u'P123', u'snaktype': u'value' }], u'P1476': [{ u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49", u'language': u'en' } }, u'property': u'P1476', u'snaktype': u'value' }], u'P407': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860 } }, u'property': u'P407', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-01-03T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P854': [{ u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html' }, u'property': u'P854', u'snaktype': u'value' }], u'P577': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-13T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P577', u'snaktype': u'value' }] }, u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b', u'snaks-order': [ u'P1476', u'P577', u'P123', u'P407', u'P854', u'P813' ] }, { u'snaks': { u'P248': [{ u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 36578 } }, u'property': u'P248', u'snaktype': u'value' }], u'P813': [{ u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-07-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0 } }, u'property': u'P813', u'snaktype': u'value' }], u'P227': [{ u'datatype': u'external-id', u'datavalue': { u'type': u'string', u'value': u'119033364' }, u'property': u'P227', u'snaktype': u'value' }] }, u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64', u'snaks-order': [u'P248', u'P227', u'P813'] }], u'mainsnak': { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 350 } }, u'property': u'P19', u'snaktype': u'value' }, u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518', u'rank': u'normal' }) # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id']) target = claim.target # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' entity_id = 'Q350' target = ItemPage.from_entity_uri( site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id) print(target) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print(result) assert result['labels']['en'] == 'Cambridge' except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')
def test_parseItemPage_filter(self): """Filtering method, allows to filter entities by a) presence of a certain parameter or b) maximal/minimal value (use +/- prefixed string for dates!)""" try: try: filter_params = {'person': [('P39', 'has_attr', None)]} parsed_with_filter = ParseItemPage( itempage, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=False, param_filter=filter_params, entity_type='person').details raise ValueError( 'The sample itempage does not contain a claim "P39", ' 'this should raise an error!') except DoesNotMatchFilterError: pass try: filter_params = {'person': [('P569', 'min', '+1952-01-01')]} parsed_with_filter = ParseItemPage( itempage, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=False, param_filter=filter_params, entity_type='person').details parsed_without_filter = ParseItemPage( itempage, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=False, entity_type='person').details assert parsed_with_filter == parsed_without_filter except ValueError: raise ValueError( 'The sample itempage does contain a claim "P19" ' '(place of birth), this should pass the filter') try: filter_params = {'person': [('P569', 'min', '+1952-01-01')]} parsed_with_filter = ParseItemPage( itempage, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=False, param_filter=filter_params, entity_type='person').details except ValueError: raise ValueError( 'Failed to identify Douglas Adams birth date as ' '>= 1952') try: filter_params = {'person': [('P569', 'min', '+1955-01-01')]} parsed_with_filter = ParseItemPage( itempage, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=False, param_filter=filter_params, entity_type='person').details raise ValueError( 'Douglas Adams misidentified as being younger than ' '1955-01-01') except DoesNotMatchFilterError: pass except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')
def test_get_country_from_location(self): # we expect a ValueError when the only local attribute tried is # P17 = country - Douglas Adams doesn't have a country attribute try: try: country_found = ParseItemPage.get_country_from_any( self.entity_mock, local_attributes=['P17'], languages=['en']) raise ValueError( 'Country should not be identified, entity contains no ' 'attribute P17!') except ValueError: pass # still a ValueError with local attributes not applicable to persons local_attributes = OrderedDict([ ("P17", u"country"), ("P131", u"located in the administrative territorial entity"), ("P159", u"headquarters location"), ("P740", u"location of formation"), ]) try: country_found = ParseItemPage.get_country_from_any( self.entity_mock, local_attributes=local_attributes, languages=['en']) raise ValueError( 'Country should not be identified, wrong type of ' 'location attributes for person entity Douglas Adams!') except ValueError: pass # with birth place ranked higher than residence, we expect # UK local_attributes = OrderedDict([ ("P17", u"country"), ("P131", u"located in the administrative territorial entity"), ("P19", u"place of birth"), ("P551", u"residence"), ("P27", u"country of citizenship"), ("P159", u"headquarters location"), ("P740", u"location of formation"), ]) country_found = ParseItemPage.get_country_from_any( self.entity_mock, local_attributes=local_attributes, languages=['en']) assert len(country_found) == 1 assert country_found[0][ 'url'] == u'https://www.wikidata.org/wiki/Q145' assert country_found[0]['labels'] == {'en': 'United Kingdom'} # with the attributes reordered, i. e. residence before place of birth, # this should return the United States (last residence: Santa Barbara local_attributes = OrderedDict([ ("P17", u"country"), ("P131", u"located in the administrative territorial entity"), ("P551", u"residence"), ("P19", u"place of birth"), ("P27", u"country of citizenship"), ("P159", u"headquarters location"), ("P740", u"location of formation"), ]) country_found = ParseItemPage.get_country_from_any( self.entity_mock, local_attributes=local_attributes, languages=['en']) assert len(country_found) == 1 assert country_found == \ [{'url': u'https://www.wikidata.org/wiki/Q30', 'labels': {'en': u'United States of America'}, 'claim_id': u'q159288$0D0A08B9-BC36-4B45-B1CF-5547215DEFCB' # this claim is actually about Santa Barbara being in the US, not # about Adams per se } ] except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')
def collect_attributes_from_wp_and_wd(itempage, languages, include_wikipedia=False, raise_on_no_wikipage=False, delay_wikipedia_retrieval=False, entity_type=None, **kwargs): """ :param itempage: ItemPage from which to collect information :param languages: list of languages in which to include literals and Wikipedia information (2-character{} ISO codes). :param raise_on_no_wikipage: Controls whether an error is raised when no Wikipedia page in any of the requested languages can be identified for this entity. If True (default), no further meta- data about such entities is collected from WikiData. If False, meta-data is still collected. :param include_wikipedia: Include information from Wikipedia pages on entity (summary, revision id & timestamp, exact url) :param delay_wikipedia_retrieval: Return only the sitelinks of existing Wikipedia pages in the relevant languages (True) or make a call to the Wikipedia API directly (False). The default `False` makes for fairly expensive operations, where possible, True should be used. :returns: a dictionary of the collected details about this entity from both Wikipedia and Wikidata. """ if hasattr(itempage, 'text'): id = itempage.id try: timestamp = get_wikidata_timestamp(itempage) except AttributeError: pass itempage = itempage.text itempage.update({'id': id, 'timestamp': timestamp}) wikipedia_data = [] if include_wikipedia: sitelinks = itempage['sitelinks'] if languages: relevant_sitelinks = { wiki: content for wiki, content in sitelinks.items() if any([lang + 'wiki' == wiki for lang in languages]) } else: relevant_sitelinks = sitelinks try: sitelinks = { wiki: content['title'] for wiki, content in relevant_sitelinks.items() } pass except TypeError as e: sitelinks = relevant_sitelinks pass if not sitelinks: if raise_on_no_wikipage: raise ValueError else: pass if delay_wikipedia_retrieval: wikipedia_data = { wiki: sitelinks[wiki] for wiki in relevant_sitelinks } try: wikipedia_data = { wiki: wikipedia_data[wiki]['title'] for wiki in wikipedia_data } except TypeError: pass elif sitelinks: try: wikipedia_data = wp_summary_from_wdid(itempage['id'], languages=languages, sitelinks=sitelinks) except (RedirectError, DisambiguationError): logger.warning( 'Failed to determine Wikipedia article: linked ' 'article is redirect or disambiguation page.', exc_info=True) raise ValueError except requests.exceptions.ConnectionError: logger.warning( 'Failed to get info about entity %s from ' 'Wikipedia API!', itempage['id'], exc_info=True) try: entity_extracted_details = {'url': wikipedia_data[0]['url']} except (KeyError, IndexError): # fallback to Wikidata ID if no Wikipedia page has been retrieved (yet) entity_extracted_details = { 'url': 'https://www.wikidata.org/wiki/' + itempage['id'] } if delay_wikipedia_retrieval: entity_extracted_details.update(wikipedia_data) elif include_wikipedia: for language in wikipedia_data: entity_extracted_details[language['language'] + 'wiki'] = language try: entity = ParseItemPage(itempage, languages=languages, entity_type=entity_type, **kwargs) except AssertionError: raise ValueError( 'No attributes of interest identified for entity{}'.format( itempage['id'])) except DoesNotMatchFilterError: raise DoesNotMatchFilterError( 'entity {} does not match filter criteria'.format(itempage['id'])) except Exception as e: logger.warn( 'Uncaught Exception: {}. Entity {} will not be processed.'.format( e, itempage['id']), exc_info=True) entity_extracted_details.update(entity.details) if include_wikipedia and not delay_wikipedia_retrieval: for language_result in merge_with_wikipedia_by_language( entity=entity_extracted_details, languages=languages): yield language_result else: yield entity_extracted_details
def test_extract_literal_properties_freestanding(): """ :return: """ claim = Claim.fromJSON(DataSite("wikidata", "wikidata"), {u'type': u'statement', u'references': [{ u'snaks': { u'P248': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 5375741}}, u'property': u'P248', u'snaktype': u'value'}]}, u'hash': u'355b56329b78db22be549dec34f2570ca61ca056', u'snaks-order': [ u'P248']}, { u'snaks': { u'P1476': [ { u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u'Obituary: Douglas Adams', u'language': u'en'}}, u'property': u'P1476', u'snaktype': u'value'}], u'P407': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860}}, u'property': u'P407', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002013-12-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P1433': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 11148}}, u'property': u'P1433', u'snaktype': u'value'}], u'P854': [ { u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.theguardian.com/news/2001/may/15/guardianobituaries.books'}, u'property': u'P854', u'snaktype': u'value'}], u'P577': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-15T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P577', u'snaktype': u'value'}], u'P50': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 18145749}}, u'property': u'P50', u'snaktype': u'value'}]}, u'hash': u'3f4d26cf841e20630c969afc0e48e5e3ef0c5a49', u'snaks-order': [ u'P854', u'P577', u'P813', u'P1433', u'P50', u'P1476', u'P407']}, { u'snaks': { u'P123': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 192621}}, u'property': u'P123', u'snaktype': u'value'}], u'P1476': [ { u'datatype': u'monolingualtext', u'datavalue': { u'type': u'monolingualtext', u'value': { u'text': u"Hitch Hiker's Guide author Douglas Adams dies aged 49", u'language': u'en'}}, u'property': u'P1476', u'snaktype': u'value'}], u'P407': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 1860}}, u'property': u'P407', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-01-03T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P854': [ { u'datatype': u'url', u'datavalue': { u'type': u'string', u'value': u'http://www.telegraph.co.uk/news/uknews/1330072/Hitch-Hikers-Guide-author-Douglas-Adams-dies-aged-49.html'}, u'property': u'P854', u'snaktype': u'value'}], u'P577': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002001-05-13T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P577', u'snaktype': u'value'}]}, u'hash': u'51a934797fd7f7d3ee91d4d541356d4c5974075b', u'snaks-order': [ u'P1476', u'P577', u'P123', u'P407', u'P854', u'P813']}, { u'snaks': { u'P248': [ { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 36578}}, u'property': u'P248', u'snaktype': u'value'}], u'P813': [ { u'datatype': u'time', u'datavalue': { u'type': u'time', u'value': { u'after': 0, u'precision': 11, u'time': u'+00000002015-07-07T00:00:00Z', u'timezone': 0, u'calendarmodel': u'http://www.wikidata.org/entity/Q1985727', u'before': 0}}, u'property': u'P813', u'snaktype': u'value'}], u'P227': [ { u'datatype': u'external-id', u'datavalue': { u'type': u'string', u'value': u'119033364'}, u'property': u'P227', u'snaktype': u'value'}]}, u'hash': u'a02f3a77ddd343e6b88be25696b055f5131c3d64', u'snaks-order': [ u'P248', u'P227', u'P813']}], u'mainsnak': { u'datatype': u'wikibase-item', u'datavalue': { u'type': u'wikibase-entityid', u'value': { u'entity-type': u'item', u'numeric-id': 350}}, u'property': u'P19', u'snaktype': u'value'}, u'id': u'q42$3D284234-52BC-4DA3-83A3-7C39F84BA518', u'rank': u'normal'}) # target_id = 'Q{}'.format(claim['mainsnak']['datavalue']['value']['numeric-id']) target = claim.target # target = pywikibot.ItemPage.from_entity_uri(site=DataSite('wikidata', 'wikidata'), uri=target_id) result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print result assert result['labels']['en'] == 'Cambridge' entity_id = 'Q350' target = ItemPage.from_entity_uri( site=DataSite('wikidata', 'wikidata'), uri='http://www.wikidata.org/entity' + '/' + entity_id) print target result = ParseItemPage.extract_literal_properties( entity=target, languages=['en'], literals=['labels']) print result assert result['labels']['en'] == 'Cambridge'
def test_parseItemPage_all(self): try: entity = itempage import pprint parsed_without_attribute_labels = ParseItemPage( entity, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=False).details parsed_with_attribute_labels = ParseItemPage( entity, include_literals=True, languages=['en', 'de', 'sv'], resolve_country=False, include_attribute_labels=True).details assert set(parsed_with_attribute_labels.keys()) == set( parsed_without_attribute_labels.keys()) assert not any( ('labels' in val for val in list(parsed_without_attribute_labels.values()))) # assert any(('labels' in val for val in parsed_with_attribute_labels.values())) assert all((parsed_with_attribute_labels[literal] == parsed_without_attribute_labels[literal] for literal in ('labels', 'descriptions', 'aliases'))) pprint.pprint(parsed_with_attribute_labels) assert self.result_without_timestamp( parsed_with_attribute_labels) == self.result_without_timestamp( sample_output) for val in list(parsed_with_attribute_labels.values()): if 'values' in val and 'P18' not in val['url']: assert all( ('labels' in sub_val for sub_val in val['values'])) parsed_with_country = ParseItemPage( entity, include_literals=False, wd_parameters={}, languages=['en', 'de', 'sv'], resolve_country=True, include_attribute_labels=True, qualifiers_of_interest=[], entity_type='person', ).details assert 'country' in parsed_with_country pprint.pprint(parsed_with_country['country']) pprint.pprint(parsed_with_country) assert parsed_with_country['country'] == { 'url': 'https://www.wikidata.org/wiki/Property:P17', 'values': [{ 'claim_id': u'q350$8E72D3A5-A067-47CB-AF45-C73ED7CFFF9E', 'derived': True, 'labels': { 'de': u'Vereinigtes K\xf6nigreich', 'en': u'United Kingdom', 'sv': u'Storbritannien' }, 'url': u'https://www.wikidata.org/wiki/Q145' }] } except pywikibot.exceptions.MaxlagTimeoutError: warnings.warn('External API unreachable')