def get_str(self, result, language): value = result.get(self.name + 's') if self.url_id and value is not None and value != '': value = value.split(',')[0] url_id = self.url_id if value.startswith(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE): value = value[len(WDURLAttribute.HTTP_WIKIMEDIA_IMAGE):] url_id = 'wikimedia_image' return get_external_url(url_id, value) return value
def get_img_src(result): """Get image URL from either wikidata or r['extratags']""" # wikidata img_src = None if 'wikidata' in result: img_src = result['wikidata']['image'] if not img_src: img_src = result['wikidata']['image_symbol'] if not img_src: img_src = result['wikidata']['image_sign'] # img_src if not img_src and result.get('extratags', {}).get('image'): img_src = result['extratags']['image'] del result['extratags']['image'] if not img_src and result.get('extratags', {}).get('wikimedia_commons'): img_src = get_external_url('wikimedia_image', result['extratags']['wikimedia_commons']) del result['extratags']['wikimedia_commons'] return img_src
def response(resp): results = [] search_res = json.loads(resp.text) # search_res.get('Entity') possible values (not exhaustive) : # * continent / country / department / location / waterfall # * actor / musician / artist # * book / performing art / film / television / media franchise / concert tour / playwright # * prepared food # * website / software / os / programming language / file format / software engineer # * compagny content = '' heading = search_res.get('Heading', '') attributes = [] urls = [] infobox_id = None relatedTopics = [] # add answer if there is one answer = search_res.get('Answer', '') if answer: logger.debug('AnswerType="%s" Answer="%s"', search_res.get('AnswerType'), answer) if search_res.get('AnswerType') not in ['calc', 'ip']: results.append({'answer': html_to_text(answer)}) # add infobox if 'Definition' in search_res: content = content + search_res.get('Definition', '') if 'Abstract' in search_res: content = content + search_res.get('Abstract', '') # image image = search_res.get('Image') image = None if image == '' else image # urls # Official website, Wikipedia page for ddg_result in search_res.get('Results', []): firstURL = ddg_result.get('FirstURL') text = ddg_result.get('Text') if firstURL is not None and text is not None: urls.append({'title': text, 'url': firstURL}) results.append({'title': heading, 'url': firstURL}) # related topics for ddg_result in search_res.get('RelatedTopics', []): if 'FirstURL' in ddg_result: firstURL = ddg_result.get('FirstURL') text = ddg_result.get('Text') if not is_broken_text(text): suggestion = result_to_text(text, ddg_result.get('Result')) if suggestion != heading and suggestion is not None: results.append({'suggestion': suggestion}) elif 'Topics' in ddg_result: suggestions = [] relatedTopics.append({'name': ddg_result.get('Name', ''), 'suggestions': suggestions}) for topic_result in ddg_result.get('Topics', []): suggestion = result_to_text(topic_result.get('Text'), topic_result.get('Result')) if suggestion != heading and suggestion is not None: suggestions.append(suggestion) # abstract abstractURL = search_res.get('AbstractURL', '') if abstractURL != '': # add as result ? problem always in english infobox_id = abstractURL urls.append({'title': search_res.get('AbstractSource'), 'url': abstractURL, 'official': True}) results.append({'url': abstractURL, 'title': heading}) # definition definitionURL = search_res.get('DefinitionURL', '') if definitionURL != '': # add as result ? as answer ? problem always in english infobox_id = definitionURL urls.append({'title': search_res.get('DefinitionSource'), 'url': definitionURL}) # to merge with wikidata's infobox if infobox_id: infobox_id = replace_http_by_https(infobox_id) # attributes # some will be converted to urls if 'Infobox' in search_res: infobox = search_res.get('Infobox') if 'content' in infobox: osm_zoom = 17 coordinates = None for info in infobox.get('content'): data_type = info.get('data_type') data_label = info.get('label') data_value = info.get('value') # Workaround: ddg may return a double quote if data_value == '""': continue # Is it an external URL ? # * imdb_id / facebook_profile / youtube_channel / youtube_video / twitter_profile # * instagram_profile / rotten_tomatoes / spotify_artist_id / itunes_artist_id / soundcloud_id # * netflix_id external_url = get_external_url(data_type, data_value) if external_url is not None: urls.append({'title': data_label, 'url': external_url}) elif data_type in ['instance', 'wiki_maps_trigger', 'google_play_artist_id']: # ignore instance: Wikidata value from "Instance Of" (Qxxxx) # ignore wiki_maps_trigger: reference to a javascript # ignore google_play_artist_id: service shutdown pass elif data_type == 'string' and data_label == 'Website': # There is already an URL for the website pass elif data_type == 'area': attributes.append({'label': data_label, 'value': area_to_str(data_value), 'entity': 'P2046'}) osm_zoom = area_to_osm_zoom(data_value.get('amount')) elif data_type == 'coordinates': if data_value.get('globe') == 'http://www.wikidata.org/entity/Q2': # coordinate on Earth # get the zoom information from the area coordinates = info else: # coordinate NOT on Earth attributes.append({'label': data_label, 'value': data_value, 'entity': 'P625'}) elif data_type == 'string': attributes.append({'label': data_label, 'value': data_value}) if coordinates: data_label = coordinates.get('label') data_value = coordinates.get('value') latitude = data_value.get('latitude') longitude = data_value.get('longitude') url = get_earth_coordinates_url(latitude, longitude, osm_zoom) urls.append({'title': 'OpenStreetMap', 'url': url, 'entity': 'P625'}) if len(heading) > 0: # TODO get infobox.meta.value where .label='article_title' if image is None and len(attributes) == 0 and len(urls) == 1 and\ len(relatedTopics) == 0 and len(content) == 0: results.append({'url': urls[0]['url'], 'title': heading, 'content': content}) else: results.append({'infobox': heading, 'id': infobox_id, 'content': content, 'img_src': image, 'attributes': attributes, 'urls': urls, 'relatedTopics': relatedTopics}) return results
def get_wikipedia_image(raw_value): if not raw_value: return None return get_external_url('wikimedia_image', raw_value)