Ejemplo n.º 1
0
def get_catalog_from_artwork(entity):
    catalog_ids = find_catalog_id(entity)
    catalog_detail = []
    for property_id in sorted(catalog_ids):
        value = wikibase.first_datavalue(entity, property_id)
        # identifier can be 'no value', example: Q26754456
        if value is None:
            continue
        detail = lookup(property_id, value)
        catalog_detail.append(detail)

    catalog = {
        'url': wikibase.first_datavalue(entity, 'P973'),
        'detail': catalog_detail,
        'ids': catalog_ids,
    }

    try:
        check_catalog(entity, catalog)
    except (requests.exceptions.ReadTimeout,
            requests.exceptions.ConnectTimeout,
            requests.exceptions.ConnectionError,
            requests.exceptions.TooManyRedirects):
        pass

    return catalog
Ejemplo n.º 2
0
def get_institution(entity, other):
    if 'P276' in entity['claims']:
        location = wikibase.first_datavalue(entity, 'P276')
        if location:
            return other[location['id']]
    if 'P195' in entity['claims']:
        collection = wikibase.first_datavalue(entity, 'P195')
        if collection:
            return other[collection['id']]
Ejemplo n.º 3
0
def check_catalog(entity, catalog):
    catalog_url = catalog['url']
    catalog_ids = catalog['ids']

    if 'P4704' in entity['claims']:
        saam_id = wikibase.first_datavalue(entity, 'P4704')
        cat = saam.get_catalog(saam_id)
        if cat:
            catalog.update(cat)
            return

    if 'P4709' in entity['claims']:
        catalog_id = wikibase.first_datavalue(entity, 'P4709')
        cat = barnesfoundation.get_catalog(catalog_id)
        if cat:
            catalog.update(cat)
            return

    institutions = [
        ('www.dia.org', dia),
        ('www.rijksmuseum.nl', rijksmuseum),
        ('www.npg.org.uk', npg),
        ('www.museodelprado.es', museodelprado),
    ]

    if catalog_url:
        for host, module in institutions:
            if host in catalog_url:
                cat = module.get_catalog(catalog_url)
                if not cat:
                    continue
                catalog.update(cat)
                return

        html = get_catalog_url(catalog_url)
        description = get_description_from_page(html)
        if description:
            catalog['description'] = description,
            return

    for property_id in sorted(catalog_ids):
        if property_id == 'P350':
            continue  # RKDimages ID
        value = wikibase.first_datavalue(entity, property_id)
        detail = lookup(property_id, value)
        try:
            html = get_catalog_page(property_id, value)
        except (requests.exceptions.ConnectionError, requests.exceptions.SSLError):
            continue  # ignore this error
        description = get_description_from_page(html)
        if not description:
            continue
        catalog = {
            'institution': detail['label'],
            'description': description,
        }
Ejemplo n.º 4
0
def add_images_to_depicts_lookup(hits):
    qid_to_item = {hit['qid']: hit for hit in hits}
    all_qids = [hit['qid'] for hit in hits]
    entities = mediawiki.get_entities_with_cache(all_qids)

    for entity in entities:
        qid = entity['id']
        item = qid_to_item[qid]
        item.entity = entity
    database.session.commit()

    for hit in hits:
        item = qid_to_item[hit['qid']]
        if item.entity:
            image_filename = wikibase.first_datavalue(item.entity, 'P18')
            hit['image_filename'] = image_filename

    filenames = [
        hit['image_filename'] for hit in hits if hit.get('image_filename')
    ]
    filenames = filenames[:50]
    thumbwidth = 200
    detail = commons.image_detail(filenames, thumbwidth=thumbwidth)

    for hit in hits:
        filename = hit.get('image_filename')
        if not filename or filename not in detail:
            continue
        hit['image'] = detail[filename]
Ejemplo n.º 5
0
def catalog_page():
    params = get_artwork_params()
    bindings = filter_artwork(params)
    page = utils.get_int_arg('page') or 1
    page_size = 45

    item_ids = set()
    for row in bindings:
        item_id = wdqs.row_id(row)
        item_ids.add(item_id)

    qids = [f'Q{item_id}' for item_id in sorted(item_ids)]

    items = [Item.query.get(item_id) for item_id in item_ids]

    entities = mediawiki.get_entities_with_cache(qids)

    items = []
    other_items = set()
    for entity in entities:
        other_items.update(build_other_set(entity))
        continue

        item = {
            'label': wikibase.get_entity_label(entity),
            'qid': entity['id'],
            'item_id': int(entity['id'][1:]),
            'image_filename': wikibase.first_datavalue(entity, 'P18'),
            'entity': entity,
        }
        items.append(item)

    other = get_labels(other_items)

    flat = '_'.join(f'{pid}={qid}' for pid, qid in params)
    thumbwidth = 400
    # FIXME cache_name can be too long for filesystem
    cache_name = f'{flat}_{page}_{page_size}_{thumbwidth}'
    detail = get_image_detail_with_cache(items,
                                         cache_name,
                                         thumbwidth=thumbwidth)

    for item in items:
        item['url'] = url_for('item_page', item_id=item['item_id'])
        item['image'] = detail[item['image_filename']]

    item_labels = get_labels(qid for pid, qid in params)
    title = ' / '.join(find_more_props[pid] + ': ' + item_labels[qid]
                       for pid, qid in params)

    return render_template('catalog.html',
                           labels=find_more_props,
                           items=items,
                           other=other,
                           title=title)
Ejemplo n.º 6
0
def get_catalog_from_artwork(entity):
    catalog_ids = find_catalog_id(entity)
    catalog_detail = []
    for property_id in sorted(catalog_ids):
        value = wikibase.first_datavalue(entity, property_id)
        detail = lookup(property_id, value)
        catalog_detail.append(detail)

    catalog = {
        'url': wikibase.first_datavalue(entity, 'P973'),
        'detail': catalog_detail,
        'ids': catalog_ids,
    }

    try:
        check_catalog(entity, catalog)
    except (requests.exceptions.ReadTimeout,
            requests.exceptions.ConnectTimeout,
            requests.exceptions.ConnectionError):
        pass

    return catalog
Ejemplo n.º 7
0
def missing_image_report():
    limit = utils.get_int_arg('limit') or 1000
    q = DepictsItem.query.order_by(DepictsItem.count.desc()).limit(limit)

    qids = [item.qid for item in q]
    entities = mediawiki.get_entities_dict_with_cache(qids)

    item_list = []

    for depicts in q:
        entity = entities[depicts.qid]
        if any(
                wikibase.first_datavalue(entity, prop)
                for prop in ('P18', 'P2716')):
            continue
        item_list.append(depicts)

        # TODO: call wikidata search to find images that depict item

    return render_template('missing_image.html', item_list=item_list)
Ejemplo n.º 8
0
def next_page(item_id):
    qid = f'Q{item_id}'

    entity = mediawiki.get_entity_with_cache(qid)

    width = 800
    image_filename = wikibase.first_datavalue(entity, 'P18')
    image = image_with_cache(qid, image_filename, width)

    label = wikibase.get_entity_label(entity)
    other = get_other(entity)

    other_list = []
    for key, prop_label in find_more_props.items():
        if key == 'P186':  # skip material used
            continue  # too generic
        claims = entity['claims'].get(key)
        if not claims:
            continue

        values = []

        for claim in claims:
            if 'datavalue' not in claim['mainsnak']:
                continue
            value = claim['mainsnak']['datavalue']['value']
            claim_qid = value['id']
            if claim_qid == 'Q4233718':
                continue  # anonymous artist
            numeric_id = value['numeric-id']
            href = url_for('find_more_page',
                           property_id=key[1:],
                           item_id=numeric_id)
            values.append({
                'href': href,
                'qid': claim_qid,
                'label': other.get(claim_qid),
            })

        if not values:
            continue

        qid_list = [v['qid'] for v in values]

        other_list.append({
            'label':
            prop_label,
            'image_lookup':
            url_for('find_more_json', pid=key, qid=qid_list),
            'pid':
            key,
            'values':
            values,
            'images': [],
        })

    return render_template('next.html',
                           qid=qid,
                           label=label,
                           image=image,
                           labels=find_more_props,
                           other=other,
                           entity=entity,
                           other_props=other_list)