def get_catalog_from_artwork(entity): catalog_ids = find_catalog_id(entity) catalog_detail = [] for property_id in sorted(catalog_ids): value = wikibase.first_datavalue(entity, property_id) # identifier can be 'no value', example: Q26754456 if value is None: continue detail = lookup(property_id, value) catalog_detail.append(detail) catalog = { 'url': wikibase.first_datavalue(entity, 'P973'), 'detail': catalog_detail, 'ids': catalog_ids, } try: check_catalog(entity, catalog) except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError, requests.exceptions.TooManyRedirects): pass return catalog
def get_institution(entity, other): if 'P276' in entity['claims']: location = wikibase.first_datavalue(entity, 'P276') if location: return other[location['id']] if 'P195' in entity['claims']: collection = wikibase.first_datavalue(entity, 'P195') if collection: return other[collection['id']]
def check_catalog(entity, catalog): catalog_url = catalog['url'] catalog_ids = catalog['ids'] if 'P4704' in entity['claims']: saam_id = wikibase.first_datavalue(entity, 'P4704') cat = saam.get_catalog(saam_id) if cat: catalog.update(cat) return if 'P4709' in entity['claims']: catalog_id = wikibase.first_datavalue(entity, 'P4709') cat = barnesfoundation.get_catalog(catalog_id) if cat: catalog.update(cat) return institutions = [ ('www.dia.org', dia), ('www.rijksmuseum.nl', rijksmuseum), ('www.npg.org.uk', npg), ('www.museodelprado.es', museodelprado), ] if catalog_url: for host, module in institutions: if host in catalog_url: cat = module.get_catalog(catalog_url) if not cat: continue catalog.update(cat) return html = get_catalog_url(catalog_url) description = get_description_from_page(html) if description: catalog['description'] = description, return for property_id in sorted(catalog_ids): if property_id == 'P350': continue # RKDimages ID value = wikibase.first_datavalue(entity, property_id) detail = lookup(property_id, value) try: html = get_catalog_page(property_id, value) except (requests.exceptions.ConnectionError, requests.exceptions.SSLError): continue # ignore this error description = get_description_from_page(html) if not description: continue catalog = { 'institution': detail['label'], 'description': description, }
def add_images_to_depicts_lookup(hits): qid_to_item = {hit['qid']: hit for hit in hits} all_qids = [hit['qid'] for hit in hits] entities = mediawiki.get_entities_with_cache(all_qids) for entity in entities: qid = entity['id'] item = qid_to_item[qid] item.entity = entity database.session.commit() for hit in hits: item = qid_to_item[hit['qid']] if item.entity: image_filename = wikibase.first_datavalue(item.entity, 'P18') hit['image_filename'] = image_filename filenames = [ hit['image_filename'] for hit in hits if hit.get('image_filename') ] filenames = filenames[:50] thumbwidth = 200 detail = commons.image_detail(filenames, thumbwidth=thumbwidth) for hit in hits: filename = hit.get('image_filename') if not filename or filename not in detail: continue hit['image'] = detail[filename]
def catalog_page(): params = get_artwork_params() bindings = filter_artwork(params) page = utils.get_int_arg('page') or 1 page_size = 45 item_ids = set() for row in bindings: item_id = wdqs.row_id(row) item_ids.add(item_id) qids = [f'Q{item_id}' for item_id in sorted(item_ids)] items = [Item.query.get(item_id) for item_id in item_ids] entities = mediawiki.get_entities_with_cache(qids) items = [] other_items = set() for entity in entities: other_items.update(build_other_set(entity)) continue item = { 'label': wikibase.get_entity_label(entity), 'qid': entity['id'], 'item_id': int(entity['id'][1:]), 'image_filename': wikibase.first_datavalue(entity, 'P18'), 'entity': entity, } items.append(item) other = get_labels(other_items) flat = '_'.join(f'{pid}={qid}' for pid, qid in params) thumbwidth = 400 # FIXME cache_name can be too long for filesystem cache_name = f'{flat}_{page}_{page_size}_{thumbwidth}' detail = get_image_detail_with_cache(items, cache_name, thumbwidth=thumbwidth) for item in items: item['url'] = url_for('item_page', item_id=item['item_id']) item['image'] = detail[item['image_filename']] item_labels = get_labels(qid for pid, qid in params) title = ' / '.join(find_more_props[pid] + ': ' + item_labels[qid] for pid, qid in params) return render_template('catalog.html', labels=find_more_props, items=items, other=other, title=title)
def get_catalog_from_artwork(entity): catalog_ids = find_catalog_id(entity) catalog_detail = [] for property_id in sorted(catalog_ids): value = wikibase.first_datavalue(entity, property_id) detail = lookup(property_id, value) catalog_detail.append(detail) catalog = { 'url': wikibase.first_datavalue(entity, 'P973'), 'detail': catalog_detail, 'ids': catalog_ids, } try: check_catalog(entity, catalog) except (requests.exceptions.ReadTimeout, requests.exceptions.ConnectTimeout, requests.exceptions.ConnectionError): pass return catalog
def missing_image_report(): limit = utils.get_int_arg('limit') or 1000 q = DepictsItem.query.order_by(DepictsItem.count.desc()).limit(limit) qids = [item.qid for item in q] entities = mediawiki.get_entities_dict_with_cache(qids) item_list = [] for depicts in q: entity = entities[depicts.qid] if any( wikibase.first_datavalue(entity, prop) for prop in ('P18', 'P2716')): continue item_list.append(depicts) # TODO: call wikidata search to find images that depict item return render_template('missing_image.html', item_list=item_list)
def next_page(item_id): qid = f'Q{item_id}' entity = mediawiki.get_entity_with_cache(qid) width = 800 image_filename = wikibase.first_datavalue(entity, 'P18') image = image_with_cache(qid, image_filename, width) label = wikibase.get_entity_label(entity) other = get_other(entity) other_list = [] for key, prop_label in find_more_props.items(): if key == 'P186': # skip material used continue # too generic claims = entity['claims'].get(key) if not claims: continue values = [] for claim in claims: if 'datavalue' not in claim['mainsnak']: continue value = claim['mainsnak']['datavalue']['value'] claim_qid = value['id'] if claim_qid == 'Q4233718': continue # anonymous artist numeric_id = value['numeric-id'] href = url_for('find_more_page', property_id=key[1:], item_id=numeric_id) values.append({ 'href': href, 'qid': claim_qid, 'label': other.get(claim_qid), }) if not values: continue qid_list = [v['qid'] for v in values] other_list.append({ 'label': prop_label, 'image_lookup': url_for('find_more_json', pid=key, qid=qid_list), 'pid': key, 'values': values, 'images': [], }) return render_template('next.html', qid=qid, label=label, image=image, labels=find_more_props, other=other, entity=entity, other_props=other_list)