def set_location(self): """ Set location based on 'by' column. If there's one wikilinked item, confirm that the corresponding WD item is of a type that's a subclass of 'human settlement', using query results downloaded by importer. If not wikilinked, check if there's a dawp article with the same name and do the same check. """ place_item = None if self.has_non_empty_attribute("by"): place = self.by if utils.count_wikilinks(place) == 1: place = utils.get_wikilinks(place)[0].title if utils.wp_page_exists("da", place): place_item = utils.q_from_wikipedia("da", place) if place_item: place_item_ids = utils.get_P31(place_item, self.repo) for p31_value in place_item_ids: if p31_value in self.data_files["settlement"]: self.add_statement("location", place_item) # there can be more than one P31, but after first positive # we can leave return
def set_location(self): """ Set the Location. Use the linked Localidad if available, and if it's not linked, try and see if there's an article anyway. Compare against external list of settlements. """ loc_dic = self.data_files["settlements"] loc_q = None if self.has_non_empty_attribute("localidad"): loc_raw = self.localidad if utils.count_wikilinks(loc_raw) == 1: loc_try = utils.q_from_first_wikilink("es", loc_raw) loc_match = utils.get_item_from_dict_by_key( dict_name=loc_dic, search_term=loc_try, search_in="item") if len(loc_match) == 1: loc_q = loc_try else: loc_try = utils.q_from_wikipedia("es", loc_raw) loc_match = utils.get_item_from_dict_by_key( dict_name=loc_dic, search_term=loc_try, search_in="item") if len(loc_match) == 1: loc_q = loc_try if loc_q: self.add_statement("location", loc_q) else: self.add_to_report("localidad", self.localidad, "location")
def set_location(self): """ Set location (P276) of object. If there's a 'plats' and it's wikilinked use it as location: [[Tyresta]] It's just a handful of items that have it, though. But all should have socken/landskap, so use that as location as well. """ if self.has_non_empty_attribute("plats"): wikilinks = utils.get_wikilinks(self.plats) if len(wikilinks) == 1: target_page = wikilinks[0].title wd_item = utils.q_from_wikipedia("sv", target_page) self.add_statement("location", wd_item) else: self.add_to_report("plats", self.plats) if self.has_non_empty_attribute("socken"): socken_dict = self.data_files["socken"] socken = self.get_socken(self.socken, self.landskap) if socken: self.add_statement("location", socken) else: try: possible_socken = [ x["item"] for x in socken_dict if x["itemLabel"].startswith(self.socken) ] if len(possible_socken) != 1: raise ValueError self.add_statement("location", possible_socken[0]) except (IndexError, ValueError): raw_socken = "{} ({})".format(self.socken, self.landskap) self.add_to_report("socken", raw_socken)
def set_parts(self): if self.has_non_empty_attribute("enlace"): parts_raw = self.enlace parts_links = utils.get_wikilinks(parts_raw) for link in parts_links: part_q = utils.q_from_wikipedia("es", link.title) if part_q: self.add_statement("has_part", part_q) else: self.add_to_report("enlace", self.enlace, "has_part")
def process_wp_reserves(): """ Process a Petscan-generated list of nature reserves on svwp. Attempt to find matches in the source file and save them to appropriate report depending on the reliability of the match (one match, multiple matches, zero matches). The whole updated report is dumped to file immediately after every lookup rather than after the finished run, so that the current result can be peeked into before the processing is done. """ results_file_exact = "svwp_to_nature_id_exact.json" results_file_none = "svwp_to_nature_id_none.json" results_file_multiple = "svwp_to_nature_id_multiple.json" results = [] results_none = [] results_multiple = [] reserves_on_wp = read_wp_nr_list() article_count = len(reserves_on_wp) reserves_source = read_reserve_csv() print("Processing {} svwp articles.".format(article_count)) counter = 0 for article_title in reserves_on_wp: if (not article_title.startswith("Lista") and "nationalpark" not in article_title.lower()): counter += 1 if counter % 10 == 0: print("Processed {}/{}...".format(counter, article_count)) guesses = find_wp_reserve_in_data_file( article_title, reserves_source) if len(guesses) == 1: entry = {} entry["wp_article"] = article_title entry["source_name"] = guesses[0]["name"] entry["nature_id"] = guesses[0]["nature_id"] entry["item"] = utils.q_from_wikipedia("sv", article_title) results.append(entry) utils.json_to_file(results_file_exact, results) elif len(guesses) > 1: entry = {} entry["wp_article"] = article_title nature_ids = [] for row in guesses: nature_ids.append(row["nature_id"]) entry["nature_id"] = nature_ids results_multiple.append(entry) utils.json_to_file(results_file_multiple, results_multiple) else: entry = {} entry["wp_article"] = article_title entry["nature_id"] = "" results_none.append(entry) utils.json_to_file(results_file_none, results_none)
def exists_with_monument_article(self, language, article_keyword="monument_article"): """ Get the wd item connected to monument_article (or equivalent), if any. Ignore if the linked article contains # in the title, indicating a section. """ if self.has_non_empty_attribute(article_keyword): article_title = getattr(self, article_keyword) if "#" not in article_title: wd_item = utils.q_from_wikipedia(language, article_title) return wd_item else: return None
def set_architect(self): """ Set the architect. Only if wikilinked. Can be more than one. Check if it's a human. """ if self.has_non_empty_attribute("arkitekt"): architects = utils.get_wikilinks(self.arkitekt) for name in architects: wp_page = name.title q_item = utils.q_from_wikipedia("sv", wp_page) if q_item: if utils.is_whitelisted_P31(q_item, self.repo, ["Q5"]): self.add_statement("architect", q_item) else: self.add_to_report("arkitekt", self.arkitekt)
def set_location(self): """ Set Location property from article linked in localitate. Run this after set_adm_location. localitate can contain several links (we take the 1st which seems to be the most granular one) and a mix of administrative types. Compare with admin location so that they're not the same. """ if self.has_non_empty_attribute("localitate"): loc_item = None if utils.count_wikilinks(self.localitate) > 0: loc_link = utils.get_wikilinks(self.localitate)[0] loc_item = utils.q_from_wikipedia("ro", loc_link.title) adm_item = self.get_statement_values("located_adm") if loc_item and loc_item != adm_item[0]: self.add_statement("location", loc_item) if not loc_item: self.add_to_report("localitate", self.localitate, "location")
def set_adm_location(self): """ Set the administrative location. Use the 'gemeinde' field to resolve municipality via wp link first, then if unable, region-iso code via mapping. """ adm_q = None municip_raw = self.gemeinde adm_q = utils.q_from_wikipedia("de", municip_raw) if adm_q is None: adm_dict = self.data_files["councils"] adm_iso_match = utils.get_item_from_dict_by_key(dict_name=adm_dict, search_term=self.region_iso, search_in="iso") if len(adm_iso_match) == 1: adm_q = adm_iso_match[0] if adm_q: self.add_statement("located_adm", adm_q) else: self.add_to_report("gemeinde", self.gemeinde, "located_adm")