def parse(): for feature in coords["features"]: pid = feature["properties"]["id"] coord = feature["geometry"]["coordinates"] coord.reverse() plantage = get_plantage(pid) qid = plantage["qid"] name = plantage["name"] if not qid: continue print() print("---" * 20) print(pid, coord, name, qid) item = WikidataItem(qid) claims = item.get_claims() if Props.COORDINATES in claims: print("Already has coordinates, skipping!") continue item.add_coordinate(Props.COORDINATES, coord, references=get_refs(item, pid))
def add_image_alias(item): qid = item["item"] img = item["Afbeelding"] alias = item["alias"] item = WikidataItem(qid) claims = item.get_claims() aliases = item.get_aliases("nl") if img != "" and Props.IMAGE not in claims: item.add_commonsmedia( Props.IMAGE, img, references = [ item.get_item_claim(Props.IMPORTED_FROM, Items.WIKIPEDIA_NL), item.get_url_claim(Props.WM_IMPORT_URL, WP_PERMALINK) ] ) else: print("has an image already") if (alias != "") and (not aliases): print(f"Setting alias: {alias}") item.edit_aliases({ "nl" : [ alias ] }) else: print("Already has aliases")
def main(): # Mariniersbrug qid = "Q2461755" item = WikidataItem(qid) claims = item.get_claims() monument_id = claims[Props.RIJKSMONUMENT_ID] print(f"monument_id: {monument_id}")
def add_sites(): PATH = str(Path(__file__).parent) sites = Knead(PATH + "/data/zomergasten/guest-sites.csv").data() for site in sites: qid = site["qid"] url = site["url"] name = site["guest"] print() print(f"Now handling {qid} / {name}") item = WikidataItem(qid) claims = item.get_claims() if Props.OFFICIAL_WEBSITE in claims: print("Already got a site, skip") continue item.add_url_claim( Props.OFFICIAL_WEBSITE, url, qualifiers=[item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH)], references=[ item.get_claim(Props.RETRIEVED, wbtime_now()), item.get_url_claim( Props.REF_URL, "https://www.vpro.nl/programmas/zomergasten/a-z.html"), item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH) ])
def main(): items = Knead(PATH + "/data/uds/monuments-with-qids.csv").data() skiplist = Skiplist("projects/skiplists/uds.txt") for index, item in enumerate(items): print(item) qid = item["qid"] bag = item["bag_ok"] url = item["url"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {bag} / {url}") if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() if Props.BAG_BUILDING in claims: print("This item already has a BAG building ID, skipping") continue wd_item.add_string_claim( Props.BAG_BUILDING, bag, references=[ wd_item.get_item_claim(Props.STATED_IN, Items.UDS_DOC), wd_item.get_url_claim(Props.REF_URL, url), wd_item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH) ]) skiplist.add(qid)
class BotJob: def __init__(self, data, item=None, dry_run=False): self.is_aborted = False self.data = data self.dry_run = dry_run self.item = item def abort(self, message): print(f"Abort: {message}") self.is_aborted = True # Lifted from https://github.com/multichill/toollabs/blob/master/bot/wikidata/artdatabot.py def archive_url(self, url): """ Links to paintings are subject to link rot. When creating a new item, have the Wayback Machine make a snapshot. That way always have a copy of the page we used to source a bunch of statements. See also https://www.wikidata.org/wiki/Wikidata:WikiProject_sum_of_all_paintings/Link_rot """ print(f"Backing up to the Wayback Machine: {url}") wayback_url = f"https://web.archive.org/save/{url}" try: requests.post(wayback_url) except requests.exceptions.RequestException: print(f"Wayback Machine save failed") def create_item(self, summary, labels, descriptions=None, aliases=None): if self.item: raise Exception("Job already has an item") print(f"Creating new item") dd({ "summary": summary, "labels": labels, "descriptions": descriptions, "aliases": aliases }) try: self.item = WikidataItem(summary=summary, labels=labels, descriptions=descriptions, aliases=aliases) except Exception as e: print("Got an exception while creating item", e) # Re-raise raise (e) print("Okay, created a new item") def has_prop(self, pid): claims = self.item.get_claims() return pid in claims def set_item_by_qid(self, qid): self.item = WikidataItem(qid)
def main(): items = Knead("projects/data/churches/import.json").data() skiplist = Skiplist("projects/skiplists/churches.txt") for index, item in enumerate(items): qid = item["qid"] title = item["title"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {title}") if not any([item["inception"], item["demolished"], item["restored"]]): print("No inception, demolished, restored, skipping") continue print(item) if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() permalink = get_permalink("nl", title) if item["inception"] and Props.INCEPTION not in claims: set_year_claim(wd_item, Props.INCEPTION, item["inception"], permalink) if item["demolished"] and Props.DISSOLVED not in claims: set_year_claim(wd_item, Props.DISSOLVED, item["demolished"], permalink) if item["restored"] and Props.SIG_EVENT not in claims: set_sig_claim(wd_item, Items.RECONSTRUCTION, item["restored"], permalink) if item["expanded"] and Props.SIG_EVENT not in claims: set_sig_claim(wd_item, Items.BUILDING_EXPANSION, item["expanded"], permalink) skiplist.add(qid)
def add_party_data(row): print("----" * 20) print() print(row) title = row["title"] qid = row["qid"] if skiplist.has(qid): print(f"In skiplist, skipping") return item = WikidataItem(qid) if Props.NR_OF_SEATS in item.get_claims(): print("Got seats already, skipping party") return for key, val in row.items(): if not key.isdigit(): continue year = int(key) if val == "": continue seats = int(val) print(f"{title} ({qid}) had {seats} seats in {year}") item.add_quantity_claim( Props.NR_OF_SEATS, seats, qualifiers=[ item.get_item_claim(Props.LEGISLATIVE_BODY, Items.NL_LOWER_HOUSE), item.get_claim(Props.START_TIME, WbTime(year=year)) ], references=[ item.get_item_claim(Props.IMPORTED_FROM, Items.WIKIPEDIA_NL), item.get_url_claim(Props.WM_IMPORT_URL, WP_PERMALINK) ]) skiplist.add(qid)
def create_item(self, summary, labels, descriptions=None, aliases=None): if self.item: raise Exception("Job already has an item") print(f"Creating new item") dd({ "summary": summary, "labels": labels, "descriptions": descriptions, "aliases": aliases }) try: self.item = WikidataItem(summary=summary, labels=labels, descriptions=descriptions, aliases=aliases) except Exception as e: print("Got an exception while creating item", e) # Re-raise raise (e) print("Okay, created a new item")
def iterate(self): for index, item in enumerate(self.data): if self.qid_key not in item or self.empty_check( item[self.qid_key]): print(f"This item has no QID, skipping, {item}") continue qid = item[self.qid_key] print() print(f"#{index + 1}/{len(self.data)} / {qid}") print(f"Data: {item}") print() if self.skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue # This is just a hook for doing a sanity check before fetching the data if not self.precheck_data(item): print(f"This item did not pass precheck, skipping") continue try: wd_item = WikidataItem(qid) except Exception as e: print(f"Exception, not yielding this job: {e}") continue job = BotJob(data=item, item=wd_item) yield job self.skiplist.add(qid) if self.run_once: print("Only running once...") sys.exit() send_im_message(f"Bot finished running: {self.id}")
def main(): items = Knead("projects/data/churchseats/seats-qids.csv").data() skiplist = Skiplist("projects/skiplists/churchseats.txt") permalink = "https://nl.wikipedia.org/w/index.php?title=Lijst_van_grootste_Nederlandse_kerkgebouwen_naar_zitplaatsen&oldid=56777124" for index, item in enumerate(items): qid = item["qid"] title = item["name"] seats = item["seats"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {title} / {seats} seats") print(item) if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() if Props.CAPACITY in claims: print("This item already has capacity, skipping") continue wd_item.add_quantity_claim( Props.CAPACITY, seats, references=[ wd_item.get_item_claim(Props.IMPORTED_FROM, Items.WIKIPEDIA_NL), wd_item.get_url_claim(Props.WM_IMPORT_URL, permalink) ]) skiplist.add(qid)
def add_inventory(item): qid = item["item"] url = item["url"] if "https://www.nijmegen.nl/kos/kunstwerk" not in url: return kid = url.replace("https://www.nijmegen.nl/kos/kunstwerk.aspx?id=", "") item = WikidataItem(qid) if Props.INVENTORY_NR in item.get_claims(): print("has inventory!") return item.add_string_claim( Props.INVENTORY_NR, kid, qualifiers = [ item.get_item_claim(Props.COLLECTION, Items.PUBLIC_ART_IN_NIJMEGEN) ], references = get_refs(item, url) )
def create_new(): items = Knead(PATH + "/data/reliwiki/new-churches.csv", has_header=True).data() CITY = "Amsterdam" for church in items: print() print(f"Creating new church", church) pageid = church["pageid"] # Last, final check if this church doesn't exist if claim_exists(Props.RELIWIKI, f'"{pageid}"'): print(f"This Reliwiki ID exists, skipping") continue name = church["name"] item = WikidataItem( summary=f"Creating new item for Dutch church with name {name}", labels={ "en": name, "nl": name }, descriptions={ "de": f"Kirche in {CITY} (Niederlande)", "en": f"church in {CITY}, the Netherlands", "es": f"iglesia en {CITY} (Holanda)", "fr": f"Église d'{CITY} (Pays-Bas)", "nl": f"kerk in {CITY}" }) item.add_item_claim(Props.INSTANCE_OF, Items.CHURCH_BUILDING) item.add_string_claim(Props.RELIWIKI, pageid) item.add_item_claim(Props.COUNTRY, Items.NETHERLANDS) item.add_item_claim(Props.LOCATED_IN, church["admin_qid"]) if church["sonneveld"] != "": item.add_string_claim(Props.SONNEVELD, church["sonneveld"], references=get_refs(item, pageid)) if church["coordinates"] != "": coord = church["coordinates"].split(",") item.add_coordinate(Props.COORDINATES, coord, references=get_refs(item, pageid)) if church["zipcode"] != "": item.add_string_claim(Props.ZIP, church["zipcode"], references=get_refs(item, pageid)) if church["address"] != "": item.add_monoling_claim(Props.STREET_ADDRESS, church["address"], "nl", references=get_refs(item, pageid)) if church["denomination_qid"] != "": item.add_item_claim(Props.RELIGION, church["denomination_qid"], references=get_refs(item, pageid)) if church["year_use"] != "": if "s" in church["year_use"]: decade = int(church["year_use"].replace("s", "")) time = WbTime(year=decade, precision="decade") else: time = WbTime(year=int(church["year_use"])) item.add_time_claim(Props.INCEPTION, time, references=get_refs(item, pageid)) print() break
def add_item(data, qid=None): name = data["name"] location = data["location"] desc = { "label_en": f"Stolperstein dedicated to {name}", "label_nl": f"Stolperstein ter herinnering aan {name}", "description_en": f"stumbling stone in {location}, the Netherlands", "description_nl": f"struikelsteen in {location}", "aliases_nl": [ f"struikelsteen ter herinnering aan {name}", ] } print(desc) if qid: print("HAS QID") item = WikidataItem(qid) else: item = WikidataItem( summary=f"Creating new item for a Stolperstein for {name}", labels={ "en": desc["label_en"], "nl": desc["label_nl"] }, descriptions={ "en": desc["description_en"], "nl": desc["description_nl"] }, aliases={"nl": desc["aliases_nl"]}) # These are the same for all stolpersteine # item.add_item_claim(Props.INSTANCE_OF, Items.STOLPERSTEIN) # item.add_item_claim(Props.PART_OF, Items. STOLPERSTEINE_PROJECT) # item.add_item_claim(Props.COUNTRY, Items.NETHERLANDS) # item.add_item_claim(Props.CREATOR, Items.GUNTER_DEMNIG) # item.add_item_claim(Props.MATERIAL_USED, Items.BRASS) # # These are not # item.add_commonsmedia(Props.IMAGE, data["image"]) # item.add_item_claim(Props.LOCATED_IN, data["location_qid"]) # item.add_coordinate(Props.COORDINATES, [data["lat"], data["lon"]]) # item.add_monoling_claim(Props.INSCRIPTION, data["inscription"], "nl") # I hate this f*****g hack def get_ref(): # Check if we have an url, that's a reference if data["url"]: return [ item.get_claim(Props.RETRIEVED, wbtime_now()), item.get_url_claim(Props.REF_URL, data["url"]), item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH) ] else: return None # Inception and opening date = partial_date_to_wbtime(data["inception"]) item.add_time_claim(Props.INCEPTION, date, references=get_ref()) item.add_item_claim(Props.SIG_EVENT, Items.OPENING_CEREMONY, qualifiers=[item.get_claim(Props.POINT_IN_TIME, date)], references=get_ref()) # Street as item item.add_item_claim(Props.LOCATED_ON_STREET, data["street_qid"], qualifiers=[ item.get_string_claim(Props.STREET_NUMBER, data["street_nr"]) ]) # Street as address address = data["street"] + " " + data["street_nr"] + ", " + data["location"] item.add_monoling_claim(Props.STREET_ADDRESS, address, "nl") item.add_item_claim( Props.COMMEMORATES, "somevalue", qualifiers=[item.get_monoling_claim(Props.NAME, name, "nl")])
def match_seasons(): PATH = str(Path(__file__).parent) seasons = Knead(PATH + "/data/zomergasten/seasons.csv").data() episodes = Knead(PATH + "/data/zomergasten/episodes.csv").data() skiplist = Skiplist(PATH + "/skiplists/zomergasten-seasons.txt") def get_season_by_year(year): for season in seasons: if season["year"] == year: return season return None prev_ep = None next_ep = None cur_year = "1988" ep_index = 1 for index, episode in enumerate(episodes): ep_qid = episode["item"] ep_year = episode["year"] ep_title = episode["itemLabel"] season = get_season_by_year(ep_year) season_qid = season["item"] season_title = season["itemLabel"] if skiplist.has(ep_qid): print(f"{ep_qid} ({ep_title}) in skiplist, skipping") if season["year"] != cur_year: print("reset") ep_index = 1 cur_year = season["year"] prev_ep = episode ep_index += 1 continue if season["year"] != cur_year: ep_index = 1 cur_year = season["year"] try: next_ep = episodes[index + 1] except: next_ep = None print("---" * 20) print( f"{ep_qid} - {ep_title} / #{ep_index} {season_qid} {season_title}") print(f"{prev_ep} / {next_ep}") print("---" * 20) print() item = WikidataItem(ep_qid) item.add_item_claim(Props.SEASON, season_qid, qualifiers=[ item.get_string_claim(Props.SERIES_ORDINAL, str(ep_index)) ]) if prev_ep: item.add_item_claim(Props.FOLLOWS, prev_ep["item"]) if next_ep: item.add_item_claim(Props.FOLLOWED_BY, next_ep["item"]) skiplist.add(ep_qid) prev_ep = episode ep_index += 1
def create_episodes(): PATH = str(Path(__file__).parent) seasons = Knead(PATH + "/data/zomergasten/zomergasten-2021.json").data() # Sort seasons by season_nr seasons.sort(key=lambda i: i["season_nr"]) episode_nr = 176 # last episode of 2020 for season in seasons: print() print(f"Handling season #{season['season_nr']}") year = season["year"] presenter_name = season["presenter"]["title"] presenter_qid = season["presenter"]["qid"] for guest in season["guests"]: episode_nr += 1 guest_name = guest["guest"]["text"] guest_qid = guest["guest"]["qid"] print("----" * 20) print() print(f"Handling episode #{episode_nr}, guest {guest_name}") date = parse_isodate(guest["date_parsed"]) if episode_nr < 8: print("Already handled, skipping") continue desc = { "label_en": f"Zomergasten with {guest_name} ({year})", "label_nl": f"Zomergasten met {guest_name} ({year})", "description_en": f"episode {episode_nr} of the Dutch talk show 'Zomergasten', as broadcasted by VPRO on {date['en']}", "description_nl": f"aflevering {episode_nr} van het VPRO-televisieprogramma 'Zomergasten', uitgezonden op {date['nl']}", "aliases_en": [ f"{presenter_name} with {guest_name}", f"Zomergasten episode {episode_nr}" ], "aliases_nl": [ f"{presenter_name} met {guest_name}", f"Zomergasten aflevering {episode_nr}" ] } if "qid" in guest: print(f"Getting a qid: {guest['qid']}") item = WikidataItem(guest["qid"]) else: item = WikidataItem( summary= f"Creating new item for the Zomergasten episode with {guest_name}", labels={ "en": desc["label_en"], "nl": desc["label_nl"] }, descriptions={ "en": desc["description_en"], "nl": desc["description_nl"] }, aliases={ "en": desc["aliases_en"], "nl": desc["aliases_nl"] }) item.add_item_claim(Props.INSTANCE_OF, Items.TV_SERIES_EPISODE) item.add_item_claim(Props.PART_OF_SERIES, Items.ZOMERGASTEN, qualifiers=[ item.get_string_claim( Props.SERIES_ORDINAL, str(episode_nr)) ]) item.add_item_claim(Props.PRESENTER, presenter_qid, references=get_ref(item)) item.add_time_claim(Props.PUB_DATE, pywikibot.WbTime(year=date["year"], month=date["month"], day=date["day"]), references=get_ref(item)) item.add_item_claim(Props.TALK_SHOW_GUEST, guest_qid, references=get_ref(item)) item.add_item_claim(Props.GENRE, Items.TALK_SHOW) item.add_item_claim(Props.ORIGINAL_BROADCASTER, Items.VPRO) item.add_item_claim(Props.COUNTRY_OF_ORIGIN, Items.NETHERLANDS) item.add_item_claim(Props.LANGUAGE_SHOW, Items.DUTCH) item.add_item_claim(Props.DISTRIBUTED_BY, Items.NPO)
def add_item(data): print("----" * 20) print() print(data) title = data["title"] url = data["url"] kid = data["id"] # if kid != "50": # return print(f"Handling {title}") if skiplist.has(kid): print(f"In skiplist, skipping") return creator = data["creator"] if creator == "": desc_nl = "kunstwerk in de openbare ruimte te Nijmegen" desc_en = "public artwork in Nijmegen, the Netherlands" elif creator == "Onbekend": desc_nl = "kunstwerk in de openbare ruimte van een onbekende maker te Nijmegen" desc_en = "public artwork by an unknown artist in Nijmegen, the Netherlands" else: desc_nl = f"kunstwerk van {creator} in de openbare ruimte te Nijmegen" desc_en = f"public artwork by {creator} in Nijmegen, the Netherlands" item = WikidataItem( summary = f"public artwork '{title}' in Nijmegen, the Netherlands", labels = { "nl" : title } ) item.edit_descriptions({ "de" : f"Kunst im öffentlichen Raum in Nijmegen (Niederlande)", "en" : desc_en, "es" : f"arte público en Nijmegen (Holanda)", "fr" : f"art public à Nimègue (Pays-Bas)", "nl" : desc_nl }) item.edit_aliases({ "en" : title }) # Basics item.add_item_claim(Props.INSTANCE_OF, Items.SCULPTURE) item.add_item_claim(Props.COUNTRY, Items.NETHERLANDS) item.add_item_claim(Props.LOCATED_IN_ADMIN, Items.NIJMEGEN_MUNIP) item.add_item_claim(Props.GENRE, Items.PUBLIC_ART) # Actual data coord = [data["lat"], data["lon"]] item.add_coordinate( Props.COORDINATES, coord, references = get_refs(item, url) ) item.add_string_claim( Props.INVENTORY_NR, kid, qualifiers = [ item.get_item_claim(Props.COLLECTION, Items.PUBLIC_ART_IN_NIJMEGEN) ], references = get_refs(item, url) ) item.add_string_claim( Props.DESCRIBED_AT_URL, url, qualifiers = [ item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH) ] ) if data["year"] != "": year = int(data["year"]) item.add_time_claim( Props.INCEPTION, WbTime(year = year), references = get_refs(item, url) ) if data["creator_qid"] != "": item.add_item_claim( Props.CREATOR, data["creator_qid"], references = get_refs(item, url) ) elif data["creator"] == "Onbekend": item.add_item_claim( Props.CREATOR, "somevalue", references = get_refs(item, url) ) if data["owner"] == "gemeente": item.add_item_claim( Props.COLLECTION, Items.NIJMEGEN_MUNIP, references = get_refs(item, url) ) elif data["owner"] == "particulier": item.add_item_claim( Props.COLLECTION, Items.PRIVATE_COLLECTION, references = get_refs(item, url) ) if data["location_clean"] != "": item.add_monoling_claim( Props.STREET_ADDRESS, data["location_clean"], "nl", references = get_refs(item, url) ) skiplist.add(kid)
def create_seasons(): PATH = str(Path(__file__).parent) seasons = Knead(PATH + "/data/zomergasten/zomergasten.json").data() # Sort seasons by season_nr seasons.sort(key=lambda i: i["season_nr"]) for season in seasons: season_nr = season["season_nr"] print("----" * 20) print() print(f"Handling season #{season_nr}") year = season["year"] presenter_name = season["presenter"]["title"] presenter_qid = season["presenter"]["qid"] episodes_count = len(season["guests"]) if season_nr < 4: print("Existing season, skipping") continue desc = { "label_en": f"Zomergasten season {season_nr} ({year})", "label_nl": f"Zomergasten seizoen {season_nr} ({year})", "description_en": f"Season {season_nr} of the Dutch talk show 'Zomergasten', as broadcasted by VPRO in {year}", "description_nl": f"Seizoen {season_nr} van het VPRO-televisieprogramma 'Zomergasten', uitgezonden in {year}", "aliases_en": [f"Zomergasten {year}", f"Zomergasten season {season_nr}"], "aliases_nl": [f"Zomergasten {year}", f"Zomergasten seizoen {season_nr}"] } item = WikidataItem( summary=f"Creating new item for the Zomergasten season {season_nr}", labels={ "en": desc["label_en"], "nl": desc["label_nl"] }, descriptions={ "en": desc["description_en"], "nl": desc["description_nl"] }, aliases={ "en": desc["aliases_en"], "nl": desc["aliases_nl"] }) item.add_item_claim(Props.INSTANCE_OF, Items.TV_SERIES_SEASON) item.add_item_claim(Props.PART_OF_SERIES, Items.ZOMERGASTEN, qualifiers=[ item.get_string_claim(Props.SERIES_ORDINAL, str(season_nr)) ]) item.add_item_claim(Props.PRESENTER, presenter_qid, references=get_ref(item)) item.add_time_claim(Props.PUB_DATE, pywikibot.WbTime(year=year), references=get_ref(item)) item.add_item_claim(Props.GENRE, Items.TALK_SHOW) item.add_item_claim(Props.ORIGINAL_BROADCASTER, Items.VPRO) item.add_item_claim(Props.COUNTRY_OF_ORIGIN, Items.NETHERLANDS) item.add_item_claim(Props.LANGUAGE_SHOW, Items.DUTCH) item.add_item_claim(Props.DISTRIBUTED_BY, Items.NPO) item.add_quantity_claim(Props.NR_OF_EPISODES, episodes_count, references=get_ref(item))
def set_item_by_qid(self, qid): self.item = WikidataItem(qid)