def main(): items = Knead(PATH + "/data/uds/monuments-with-qids.csv").data() skiplist = Skiplist("projects/skiplists/uds.txt") for index, item in enumerate(items): print(item) qid = item["qid"] bag = item["bag_ok"] url = item["url"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {bag} / {url}") if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() if Props.BAG_BUILDING in claims: print("This item already has a BAG building ID, skipping") continue wd_item.add_string_claim( Props.BAG_BUILDING, bag, references=[ wd_item.get_item_claim(Props.STATED_IN, Items.UDS_DOC), wd_item.get_url_claim(Props.REF_URL, url), wd_item.get_item_claim(Props.LANGUAGE_WORK, Items.DUTCH) ]) skiplist.add(qid)
def main(): items = Knead("projects/data/churches/import.json").data() skiplist = Skiplist("projects/skiplists/churches.txt") for index, item in enumerate(items): qid = item["qid"] title = item["title"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {title}") if not any([item["inception"], item["demolished"], item["restored"]]): print("No inception, demolished, restored, skipping") continue print(item) if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() permalink = get_permalink("nl", title) if item["inception"] and Props.INCEPTION not in claims: set_year_claim(wd_item, Props.INCEPTION, item["inception"], permalink) if item["demolished"] and Props.DISSOLVED not in claims: set_year_claim(wd_item, Props.DISSOLVED, item["demolished"], permalink) if item["restored"] and Props.SIG_EVENT not in claims: set_sig_claim(wd_item, Items.RECONSTRUCTION, item["restored"], permalink) if item["expanded"] and Props.SIG_EVENT not in claims: set_sig_claim(wd_item, Items.BUILDING_EXPANSION, item["expanded"], permalink) skiplist.add(qid)
def main(): items = Knead("projects/data/churchseats/seats-qids.csv").data() skiplist = Skiplist("projects/skiplists/churchseats.txt") permalink = "https://nl.wikipedia.org/w/index.php?title=Lijst_van_grootste_Nederlandse_kerkgebouwen_naar_zitplaatsen&oldid=56777124" for index, item in enumerate(items): qid = item["qid"] title = item["name"] seats = item["seats"] print() print(f"#{index} / #{len(items)}") print(f"Handling {qid} / {title} / {seats} seats") print(item) if skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue wd_item = WikidataItem(qid) claims = wd_item.get_claims() if Props.CAPACITY in claims: print("This item already has capacity, skipping") continue wd_item.add_quantity_claim( Props.CAPACITY, seats, references=[ wd_item.get_item_claim(Props.IMPORTED_FROM, Items.WIKIPEDIA_NL), wd_item.get_url_claim(Props.WM_IMPORT_URL, permalink) ]) skiplist.add(qid)
def match_seasons(): PATH = str(Path(__file__).parent) seasons = Knead(PATH + "/data/zomergasten/seasons.csv").data() episodes = Knead(PATH + "/data/zomergasten/episodes.csv").data() skiplist = Skiplist(PATH + "/skiplists/zomergasten-seasons.txt") def get_season_by_year(year): for season in seasons: if season["year"] == year: return season return None prev_ep = None next_ep = None cur_year = "1988" ep_index = 1 for index, episode in enumerate(episodes): ep_qid = episode["item"] ep_year = episode["year"] ep_title = episode["itemLabel"] season = get_season_by_year(ep_year) season_qid = season["item"] season_title = season["itemLabel"] if skiplist.has(ep_qid): print(f"{ep_qid} ({ep_title}) in skiplist, skipping") if season["year"] != cur_year: print("reset") ep_index = 1 cur_year = season["year"] prev_ep = episode ep_index += 1 continue if season["year"] != cur_year: ep_index = 1 cur_year = season["year"] try: next_ep = episodes[index + 1] except: next_ep = None print("---" * 20) print( f"{ep_qid} - {ep_title} / #{ep_index} {season_qid} {season_title}") print(f"{prev_ep} / {next_ep}") print("---" * 20) print() item = WikidataItem(ep_qid) item.add_item_claim(Props.SEASON, season_qid, qualifiers=[ item.get_string_claim(Props.SERIES_ORDINAL, str(ep_index)) ]) if prev_ep: item.add_item_claim(Props.FOLLOWS, prev_ep["item"]) if next_ep: item.add_item_claim(Props.FOLLOWED_BY, next_ep["item"]) skiplist.add(ep_qid) prev_ep = episode ep_index += 1
class CreateBot: def __init__(self, botid, datapath, key="id", required_fields=[], empty_check=lambda x: x == None): print(f"Setting up new bot '{botid}'") print(f"Data path: {datapath}") # Parse command line arguments and play it safe, assume # run_once and dry_run by default, except when they're # disabled args = pywikibot.handle_args() run_once = "-run-all" not in args dry_run = "-run-live" not in args print(f"Running once? {run_once}") print(f"Dry run? {dry_run}") self.id = botid self.run_once = run_once self.dry_run = dry_run self.skiplist = Skiplist(f"projects/skiplists/{self.id}.txt") self.key = key self.current_job = None self.data = Knead(datapath).data() self.required_fields = required_fields self.empty_check = empty_check def has_required_fields(self, item): for field in self.required_fields: if self.empty_check(item[field]): print(f"'{field}' is empty, aborting") return False return True def iterate(self): for index, item in enumerate(self.data): if self.key not in item or item[self.key] == "": print(f"This item has no key, skipping, {item}") continue item_id = item[self.key] print() print(f"#{index + 1} / {len(self.data)} / id:{item_id}") if self.skiplist.has(item_id): print(f"{item_id} in skiplist, skipping") continue dd(item) print() if not self.has_required_fields(item): continue if self.dry_run: print("Dry run, skip the actual creating") continue job = BotJob(data=item) self.current_job = job yield job if job.is_aborted: continue if not job.item: raise Exception("Still no item for this job, aborting") self.skiplist.add(item_id) if self.run_once: print("Only running once...") sys.exit() print("Bot is done") send_im_message(f"CreateBot finished running: {self.id}")
class Bot: def __init__(self, botid, datapath=None, sparql=None, run_once=False, qid_key="qid", empty_check=lambda x: x == None or x == "", precheck_data=lambda x: True): print(f"Setting up new bot '{botid}'") if (not datapath) and (not sparql): raise Error("No datapath and no sparql") # Parse command line arguments and play it safe, assume # run_once by default, except when they're # disabled args = pywikibot.handle_args() run_once = "-run-all" not in args print(f"Running once? {run_once}") self.id = botid self.run_once = run_once self.qid_key = qid_key self.empty_check = empty_check self.precheck_data = precheck_data self.skiplist = Skiplist(f"projects/skiplists/{self.id}.txt") if datapath: self.data = Knead(datapath, has_header=True).data() elif sparql: query = Query(sparql) self.data = list(query.iter_results()) def iterate(self): for index, item in enumerate(self.data): if self.qid_key not in item or self.empty_check( item[self.qid_key]): print(f"This item has no QID, skipping, {item}") continue qid = item[self.qid_key] print() print(f"#{index + 1}/{len(self.data)} / {qid}") print(f"Data: {item}") print() if self.skiplist.has(qid): print(f"{qid} in skiplist, skipping") continue # This is just a hook for doing a sanity check before fetching the data if not self.precheck_data(item): print(f"This item did not pass precheck, skipping") continue try: wd_item = WikidataItem(qid) except Exception as e: print(f"Exception, not yielding this job: {e}") continue job = BotJob(data=item, item=wd_item) yield job self.skiplist.add(qid) if self.run_once: print("Only running once...") sys.exit() send_im_message(f"Bot finished running: {self.id}")