def try_map_response_to_movement(response: Dict) -> Dict: """Maps the oab movement attributes from the wikidata entity to the movement entity Args: response: wikidata entity to map to an oab entity Returns: A dict of an movement entity """ start_time = map_wd_attribute.try_get_year_from_property_timestamp( response, PROPERTY_NAME_TO_PROPERTY_ID[START_TIME], MOVEMENT[SINGULAR] ) end_time = map_wd_attribute.try_get_year_from_property_timestamp( response, PROPERTY_NAME_TO_PROPERTY_ID[END_TIME], MOVEMENT[SINGULAR] ) # labels to be resolved later country = map_wd_attribute.try_get_first_qid( response, PROPERTY_NAME_TO_PROPERTY_ID[COUNTRY], MOVEMENT[SINGULAR] ) has_part = map_wd_attribute.try_get_qid_reference_list( response, PROPERTY_NAME_TO_PROPERTY_ID[HAS_PART], MOVEMENT[SINGULAR] ) part_of = map_wd_attribute.try_get_qid_reference_list( response, PROPERTY_NAME_TO_PROPERTY_ID[PART_OF], MOVEMENT[SINGULAR] ) return { START_TIME: start_time, END_TIME: end_time, COUNTRY: country, HAS_PART: has_part, PART_OF: part_of, }
def try_map_response_to_subject( response: Dict, type_name: str, language_keys: Optional[List[str]] = lang_keys, ) -> Dict: """Maps the default attributes which every subject has: qid, image, label, description, classes, wikipediaLink (including language specific attributes) Args: response: The wikidata entity which should be mapped to an openArtBrowser entity type_name: Type name of the entity language_keys: All language keys which should be extracted. Defaults to languageconfig.csv Returns: A dict of an openArtBrowser entity """ try: qid = response[ID] except Exception as error: logger.error("Error on qid, skipping item. Error: {0}".format(error)) return None # How to get image url # https://stackoverflow.com/questions/34393884/how-to-get-image-url-property-from-wikidata-item-by-api try: image = get_image_url_by_name( response[CLAIMS][PROPERTY_NAME_TO_PROPERTY_ID[IMAGE]][0][MAINSNAK] [DATAVALUE][VALUE]) except: image = "" label = map_wd_attribute.try_get_label_or_description( response, LABEL[PLURAL], EN, type_name) description = map_wd_attribute.try_get_label_or_description( response, DESCRIPTION[PLURAL], EN, type_name) classes = map_wd_attribute.try_get_qid_reference_list( response, PROPERTY_NAME_TO_PROPERTY_ID[CLASS[SINGULAR]], type_name) subject_dict = { ID: qid, CLASS[PLURAL]: classes, LABEL[SINGULAR]: label, DESCRIPTION[SINGULAR]: description, IMAGE: image, } for langkey in language_keys: label_lang = map_wd_attribute.try_get_label_or_description( response, LABEL[PLURAL], langkey, type_name) description_lang = map_wd_attribute.try_get_label_or_description( response, DESCRIPTION[PLURAL], langkey, type_name) wikipedia_link_lang = map_wd_attribute.try_get_wikipedia_link( response, langkey, type_name) subject_dict.update({ f"{LABEL[SINGULAR]}_{langkey}": label_lang, f"{DESCRIPTION[SINGULAR]}_{langkey}": description_lang, f"{WIKIPEDIA_LINK}_{langkey}": wikipedia_link_lang, }) return subject_dict
def try_map_response_to_artist(response: Dict) -> Dict: """Maps the oab artist attributes from the wikidata entity to the artist entity Args: response: wikidata entity to map to an oab entity Returns: A dict of an artist entity """ gender = map_wd_attribute.try_get_first_qid( response, PROPERTY_NAME_TO_PROPERTY_ID[GENDER], ARTIST[SINGULAR] ) date_of_birth = map_wd_attribute.try_get_year_from_property_timestamp( response, PROPERTY_NAME_TO_PROPERTY_ID[DATE_OF_BIRTH], ARTIST[SINGULAR] ) date_of_death = map_wd_attribute.try_get_year_from_property_timestamp( response, PROPERTY_NAME_TO_PROPERTY_ID[DATE_OF_DEATH], ARTIST[SINGULAR] ) # labels to be resolved later place_of_birth = map_wd_attribute.try_get_first_qid( response, PROPERTY_NAME_TO_PROPERTY_ID[PLACE_OF_BIRTH], ARTIST[SINGULAR] ) # labels to be resolved later place_of_death = map_wd_attribute.try_get_first_qid( response, PROPERTY_NAME_TO_PROPERTY_ID[PLACE_OF_DEATH], ARTIST[SINGULAR] ) # labels to be resolved later citizenship = map_wd_attribute.try_get_first_qid( response, PROPERTY_NAME_TO_PROPERTY_ID[CITIZENSHIP], ARTIST[SINGULAR] ) movements = map_wd_attribute.try_get_qid_reference_list( response, PROPERTY_NAME_TO_PROPERTY_ID[MOVEMENT[SINGULAR]], ARTIST[SINGULAR] ) return { GENDER: gender, DATE_OF_BIRTH: date_of_birth, DATE_OF_DEATH: date_of_death, PLACE_OF_BIRTH: place_of_birth, PLACE_OF_DEATH: place_of_death, CITIZENSHIP: citizenship, MOVEMENT[PLURAL]: movements, }
def try_map_response_to_location(response): """Maps the oab location attributes from the wikidata entity to the location entity Args: response: wikidata entity to map to an oab entity Returns: A dict of an location entity """ country = map_wd_attribute.try_get_first_qid( response, PROPERTY_NAME_TO_PROPERTY_ID[COUNTRY], LOCATION[SINGULAR] ) website = map_wd_attribute.try_get_first_value( response, PROPERTY_NAME_TO_PROPERTY_ID[WEBSITE], LOCATION[SINGULAR] ) part_of = map_wd_attribute.try_get_qid_reference_list( response, PROPERTY_NAME_TO_PROPERTY_ID[PART_OF], LOCATION[SINGULAR] ) try: coordinate = response[CLAIMS][PROPERTY_NAME_TO_PROPERTY_ID[COORDINATE]][0][ MAINSNAK ][DATAVALUE][VALUE] lat = coordinate[LATITUDE[SINGULAR]] lon = coordinate[LONGITUDE[SINGULAR]] except Exception as error: logger.info( "Error on item {0}, property {1}, type {2}, error {3}".format( response[ID], PROPERTY_NAME_TO_PROPERTY_ID[COORDINATE], LOCATION[SINGULAR], error, ) ) lat = "" lon = "" return { COUNTRY: country, WEBSITE: website, PART_OF: part_of, LATITUDE[ABBREVIATION]: lat, LONGITUDE[ABBREVIATION]: lon, }
def get_classes( type_name: str, qids: List[str], already_extracted_superclass_ids: Set[str] = set(), language_keys: Optional[List[str]] = lang_keys, ) -> List[Dict]: """Function to extract the classes of the extracted wikidata entities (meaning the 'instance of' attribute wikidata entity qids). Their subclasses are also extracted recursively (also called transitive closure) Args: type_name: oab type e. g. movement qids: List of qids to extract the labels from language_keys: All language keys which should be extracted. Defaults to languageconfig.csv already_extracted_superclass_ids: A list of already extracted superclass ids for the recursive calls, this is also the anchor to stop recursion Returns: Returns a list of dicts with the classes from the oab entities and their subclasses """ print(datetime.datetime.now(), f"Starting with {type_name}") if type_name == CLASS[PLURAL]: print( f"Total {type_name} to extract (only 'instance_of' of the provided qids): {len(qids)}" ) else: print( f"Total {type_name} to extract (only 'subclass_of' of the provided qids): {len(qids)}" ) item_count = 0 extract_dicts = [] chunk_size = 50 # The chunksize 50 is allowed by the wikidata api, bigger numbers need special permissions classes_id_chunks = chunks(list(qids), chunk_size) for chunk in classes_id_chunks: query_result = wikidata_entity_request(chunk) if ENTITIES not in query_result: logger.error("Skipping chunk") continue for result in query_result[ENTITIES].values(): try: qid = result[ID] except Exception as error: logger.error( "Error on qid, skipping item. Error: {0}".format(error)) continue label = map_wd_attribute.try_get_label_or_description( result, LABEL[PLURAL], EN, type_name) description = map_wd_attribute.try_get_label_or_description( result, DESCRIPTION[PLURAL], EN, type_name) subclass_of = map_wd_attribute.try_get_qid_reference_list( result, PROPERTY_NAME_TO_PROPERTY_ID[SUBCLASS_OF], type_name) class_dict = { ID: qid, LABEL[SINGULAR]: label, DESCRIPTION[SINGULAR]: description, SUBCLASS_OF: subclass_of, } for langkey in language_keys: label_lang = map_wd_attribute.try_get_label_or_description( result, LABEL[PLURAL], langkey, type_name) description_lang = map_wd_attribute.try_get_label_or_description( result, DESCRIPTION[PLURAL], langkey, type_name) class_dict.update({ f"{LABEL[SINGULAR]}_{langkey}": label_lang, f"{DESCRIPTION[SINGULAR]}_{langkey}": description_lang, }) extract_dicts.append(class_dict) item_count += len(chunk) print(f"Status of {type_name}: {item_count}/{len(qids)}", end="\r", flush=True) return load_entities_by_attribute_with_transitive_closure( extract_dicts, SUBCLASS_OF, CLASS[PLURAL], already_extracted_superclass_ids, get_classes, [], )
def get_subject( type_name: str, qids: List[str], already_extracted_movement_ids: Set[str] = set(), language_keys: Optional[List[str]] = lang_keys, ) -> List[Dict]: """Extract subjects (in our definition everything except artworks e. g. movements, motifs, etc.) from wikidata Args: type_name: oab type name e. g. movements (Caution type names are always plural here) qids: A list of qids extracted from the artworks language_keys: All language keys which should be extracted. Defaults to languageconfig.csv Returns: A list of dicts with the subjects transformed from wikidata entities to oab entities """ print(datetime.datetime.now(), f"Starting with {type_name}") print(f"Total {type_name} to extract: {len(qids)}") item_count = 0 extract_dicts = [] chunk_size = 50 # The chunksize 50 is allowed by the wikidata api, bigger numbers need special permissions subject_id_chunks = chunks(list(qids), chunk_size) for chunk in subject_id_chunks: query_result = wikidata_entity_request(chunk) if ENTITIES not in query_result: logger.error("Skipping chunk") continue for result in query_result[ENTITIES].values(): subject_dict = map_wd_response.try_map_response_to_subject( result, type_name) if subject_dict is None: continue if type_name == MOVEMENT[PLURAL] or type_name == ARTIST[PLURAL]: influenced_by = map_wd_attribute.try_get_qid_reference_list( result, PROPERTY_NAME_TO_PROPERTY_ID[INFLUENCED_BY], type_name) subject_dict.update({INFLUENCED_BY: influenced_by}) if type_name == MOVEMENT[PLURAL]: subject_dict.update( map_wd_response.try_map_response_to_movement(result)) already_extracted_movement_ids.add(subject_dict[ID]) if type_name == ARTIST[PLURAL]: subject_dict.update( map_wd_response.try_map_response_to_artist(result)) if type_name == LOCATION[PLURAL]: subject_dict.update( map_wd_response.try_map_response_to_location(result)) extract_dicts.append(subject_dict) item_count += len(chunk) print(f"Status of {type_name}: {item_count}/{len(qids)}", end="\r", flush=True) if type_name == MOVEMENT[PLURAL]: extract_dicts = load_entities_by_attribute_with_transitive_closure( extract_dicts, PART_OF, MOVEMENT[PLURAL], already_extracted_movement_ids, get_subject, [ART_MOVEMENT[ID], ART_STYLE[ID]], ) extract_dicts = load_entities_by_attribute_with_transitive_closure( extract_dicts, HAS_PART, MOVEMENT[PLURAL], already_extracted_movement_ids, get_subject, [ART_MOVEMENT[ID], ART_STYLE[ID]], ) return extract_dicts print(datetime.datetime.now(), f"Finished with {type_name}") return extract_dicts