def main(): # Mariniersbrug qid = "Q2461755" item = WikidataItem(qid) claims = item.get_claims() monument_id = claims[Props.RIJKSMONUMENT_ID] print(f"monument_id: {monument_id}")
def parse(): for feature in coords["features"]: pid = feature["properties"]["id"] coord = feature["geometry"]["coordinates"] coord.reverse() plantage = get_plantage(pid) qid = plantage["qid"] name = plantage["name"] if not qid: continue print() print("---" * 20) print(pid, coord, name, qid) item = WikidataItem(qid) claims = item.get_claims() if Props.COORDINATES in claims: print("Already has coordinates, skipping!") continue item.add_coordinate(Props.COORDINATES, coord, references=get_refs(item, pid))
def add_image_alias(item): qid = item["item"] img = item["Afbeelding"] alias = item["alias"] item = WikidataItem(qid) claims = item.get_claims() aliases = item.get_aliases("nl") if img != "" and Props.IMAGE not in claims: item.add_commonsmedia( Props.IMAGE, img, references = [ item.get_item_claim(Props.IMPORTED_FROM, Items.WIKIPEDIA_NL), item.get_url_claim(Props.WM_IMPORT_URL, WP_PERMALINK) ] ) else: print("has an image already") if (alias != "") and (not aliases): print(f"Setting alias: {alias}") item.edit_aliases({ "nl" : [ alias ] }) else: print("Already has aliases")
def add_sites(): PATH = str(Path(__file__).parent) sites = Knead(PATH + "/data/zomergasten/guest-sites.csv").data() for site in sites: qid = site["qid"] url = site["url"] name = site["guest"] print() print(f"Now handling {qid} / {name}") item = WikidataItem(qid) claims = item.get_claims() if Props.OFFICIAL_WEBSITE in claims: print("Already got a site, skip") continue item.add_url_claim( Props.OFFICIAL_WEBSITE, url, qualifiers=[item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH)], references=[ item.get_claim(Props.RETRIEVED, wbtime_now()), item.get_url_claim( Props.REF_URL, "https://www.vpro.nl/programmas/zomergasten/a-z.html"), item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH) ])
def main(): items = Knead(PATH + "/data/uds/monuments-with-qids.csv").data() skiplist = Skiplist("projects/skiplists/uds.txt") for index, item in enumerate(items): print(item) qid = item["qid"] bag = item["bag_ok"] url = item["url"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {bag} / {url}") if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() if Props.BAG_BUILDING in claims: print("This item already has a BAG building ID, skipping") continue wd_item.add_string_claim( Props.BAG_BUILDING, bag, references=[ wd_item.get_item_claim(Props.STATED_IN, Items.UDS_DOC), wd_item.get_url_claim(Props.REF_URL, url), wd_item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH) ]) skiplist.add(qid)
class BotJob: def __init__(self, data, item=None, dry_run=False): self.is_aborted = False self.data = data self.dry_run = dry_run self.item = item def abort(self, message): print(f"Abort: {message}") self.is_aborted = True # Lifted from https://github.com/multichill/toollabs/blob/master/bot/wikidata/artdatabot.py def archive_url(self, url): """ Links to paintings are subject to link rot. When creating a new item, have the Wayback Machine make a snapshot. That way always have a copy of the page we used to source a bunch of statements. See also https://www.wikidata.org/wiki/Wikidata:WikiProject_sum_of_all_paintings/Link_rot """ print(f"Backing up to the Wayback Machine: {url}") wayback_url = f"https://web.archive.org/save/{url}" try: requests.post(wayback_url) except requests.exceptions.RequestException: print(f"Wayback Machine save failed") def create_item(self, summary, labels, descriptions=None, aliases=None): if self.item: raise Exception("Job already has an item") print(f"Creating new item") dd({ "summary": summary, "labels": labels, "descriptions": descriptions, "aliases": aliases }) try: self.item = WikidataItem(summary=summary, labels=labels, descriptions=descriptions, aliases=aliases) except Exception as e: print("Got an exception while creating item", e) # Re-raise raise (e) print("Okay, created a new item") def has_prop(self, pid): claims = self.item.get_claims() return pid in claims def set_item_by_qid(self, qid): self.item = WikidataItem(qid)
def add_party_data(row): print("----" * 20) print() print(row) title = row["title"] qid = row["qid"] if skiplist.has(qid): print(f"In skiplist, skipping") return item = WikidataItem(qid) if Props.NR_OF_SEATS in item.get_claims(): print("Got seats already, skipping party") return for key, val in row.items(): if not key.isdigit(): continue year = int(key) if val == "": continue seats = int(val) print(f"{title} ({qid}) had {seats} seats in {year}") item.add_quantity_claim( Props.NR_OF_SEATS, seats, qualifiers=[ item.get_item_claim(Props.LEGISLATIVE_BODY, Items.NL_LOWER_HOUSE), item.get_claim(Props.START_TIME, WbTime(year=year)) ], references=[ item.get_item_claim(Props.IMPORTED_FROM, Items.WIKIPEDIA_NL), item.get_url_claim(Props.WM_IMPORT_URL, WP_PERMALINK) ]) skiplist.add(qid)
def main(): items = Knead("projects/data/churches/import.json").data() skiplist = Skiplist("projects/skiplists/churches.txt") for index, item in enumerate(items): qid = item["qid"] title = item["title"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {title}") if not any([item["inception"], item["demolished"], item["restored"]]): print("No inception, demolished, restored, skipping") continue print(item) if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() permalink = get_permalink("nl", title) if item["inception"] and Props.INCEPTION not in claims: set_year_claim(wd_item, Props.INCEPTION, item["inception"], permalink) if item["demolished"] and Props.DISSOLVED not in claims: set_year_claim(wd_item, Props.DISSOLVED, item["demolished"], permalink) if item["restored"] and Props.SIG_EVENT not in claims: set_sig_claim(wd_item, Items.RECONSTRUCTION, item["restored"], permalink) if item["expanded"] and Props.SIG_EVENT not in claims: set_sig_claim(wd_item, Items.BUILDING_EXPANSION, item["expanded"], permalink) skiplist.add(qid)
def add_inventory(item): qid = item["item"] url = item["url"] if "https://www.nijmegen.nl/kos/kunstwerk" not in url: return kid = url.replace("https://www.nijmegen.nl/kos/kunstwerk.aspx?id=", "") item = WikidataItem(qid) if Props.INVENTORY_NR in item.get_claims(): print("has inventory!") return item.add_string_claim( Props.INVENTORY_NR, kid, qualifiers = [ item.get_item_claim(Props.COLLECTION, Items.PUBLIC_ART_IN_NIJMEGEN) ], references = get_refs(item, url) )
def main(): items = Knead("projects/data/churchseats/seats-qids.csv").data() skiplist = Skiplist("projects/skiplists/churchseats.txt") permalink = "https://nl.wikipedia.org/w/index.php?title=Lijst_van_grootste_Nederlandse_kerkgebouwen_naar_zitplaatsen&oldid=56777124" for index, item in enumerate(items): qid = item["qid"] title = item["name"] seats = item["seats"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {title} / {seats} seats") print(item) if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() if Props.CAPACITY in claims: print("This item already has capacity, skipping") continue wd_item.add_quantity_claim( Props.CAPACITY, seats, references=[ wd_item.get_item_claim(Props.IMPORTED_FROM, Items.WIKIPEDIA_NL), wd_item.get_url_claim(Props.WM_IMPORT_URL, permalink) ]) skiplist.add(qid)