def _handle_metadata(catalog, entity): url_prefix = CATALOG_ENTITY_URLS.get(f'{catalog}_{entity}') if url_prefix is None: LOGGER.debug('URL not available for %s %s', catalog, entity) # Set human as default when the relevant class QID # is not an instance-of (P31) value if SUPPORTED_ENTITIES.get(entity) == keys.CLASS_QUERY: class_qid = target_database.get_class_qid(catalog, entity) else: class_qid = HUMAN_QID return class_qid, url_prefix
def gather_qids(entity, catalog, catalog_pid): LOGGER.info( 'Gathering Wikidata %s items with no %s identifiers ...', entity, catalog, ) query_type = keys.DATASET, constants.SUPPORTED_ENTITIES.get(entity) qids = set( sparql_queries.run_query( query_type, target_database.get_class_qid(catalog, entity), catalog_pid, 0, )) LOGGER.info('Got %d Wikidata items', len(qids)) return qids
def gather_target_ids(entity, catalog, catalog_pid, aggregated): LOGGER.info('Gathering Wikidata %s items with %s identifiers ...', entity, catalog) query_type = keys.IDENTIFIER, constants.SUPPORTED_ENTITIES.get(entity) for qid, target_id in sparql_queries.run_query( query_type, target_database.get_class_qid(catalog, entity), catalog_pid, 0, ): if not aggregated.get(qid): aggregated[qid] = {keys.TID: set()} aggregated[qid][keys.TID].add(target_id) LOGGER.info('Got %d %s identifiers', len(aggregated), catalog)