def main(arguments): """Process the arguments and fetch data according to them""" arguments = vars(arguments) current_time = utils.get_current_timestamp() wikidata_site = utils.create_site_instance("wikidata", "wikidata") existing_areas = get_wd_items_using_prop("P3613") area_data = load_nature_area_file(arguments["dataset"]) data_files = load_mapping_files() if arguments["offset"]: print("Using offset: {}.".format(str(arguments["offset"]))) area_data = area_data[arguments["offset"]:] if arguments["limit"]: print("Using limit: {}.".format(str(arguments["limit"]))) area_data = area_data[:arguments["limit"]] for area in area_data: reserve = NatureArea(area, wikidata_site, data_files, existing_areas) if arguments["table"]: filename = "{}_{}.txt".format(arguments["dataset"], current_time) preview = PreviewTable(reserve) utils.append_line_to_file(preview.make_table(), filename) if arguments["upload"]: live = True if arguments["upload"] == "live" else False if arguments["dataset"] == "nr": edit_summary = edit_summary_reserves elif arguments["dataset"] == "np": edit_summary = edit_summary_nationalparks uploader = Uploader(reserve, repo=wikidata_site, live=live, edit_summary=edit_summary) uploader.upload()
def main(arguments): arguments = vars(arguments) wikidata_site = utils.create_site_instance("wikidata", "wikidata") existing_people = get_wd_items_using_prop("P4357") auth_data = load_auth_file() data_files = load_mapping_files() if arguments["offset"]: print("Using offset: {}.".format(str(arguments["offset"]))) auth_data = auth_data[arguments["offset"]:] if arguments["limit"]: print("Using limit: {}.".format(str(arguments["limit"]))) auth_data = auth_data[:arguments["limit"]] for p in auth_data: p_data = p[list(p.keys())[0]] person = Person(p_data, wikidata_site, data_files, existing_people) if arguments["upload"]: live = True if arguments["upload"] == "live" else False uploader = Uploader( person, repo=wikidata_site, live=live, edit_summary="importing #Musikverket authority file") try: uploader.upload() except pywikibot.data.api.APIError: continue
def main(arguments): """Process the arguments and fetch data according to them.""" arguments = vars(arguments) filenames = generate_filenames("Bosnia", utils.get_current_timestamp()) repo = utils.create_site_instance("wikidata", "wikidata") source_data = load_source_data(arguments["datadir"]) data_files = load_mapping_files() mapping = data_files["_static"]["ba_(bs)"] existing = get_wd_items_using_prop( data_files["_static"]["ba_(bs)"]["unique"]["property"]) if arguments["offset"]: print("Using offset: {}.".format(str(arguments["offset"]))) source_data = source_data[arguments["offset"]:] if arguments["short"]: print("Using limit: {}.".format(str(arguments["short"]))) source_data = source_data[:arguments["short"]] for row in source_data: monument = BosniaMonument(row, mapping, data_files, existing, repo) if arguments["table"]: raw_data = "<pre>" + str(row) + "</pre>\n" monument_table = monument.print_wd_to_table() utils.append_line_to_file(raw_data, filenames['examples']) utils.append_line_to_file(monument_table, filenames['examples']) if arguments["upload"]: live = True if arguments["upload"] == "live" else False uploader = Uploader(monument, repo=repo, live=live, tablename="ba") uploader.upload()
def main(arguments): wikidata_site = utils.create_site_instance("wikidata", "wikidata") data_files = load_mapping_files() cache = {} existing_editions = utils.get_wd_items_using_prop( data_files["properties"]["libris_uri"]) if arguments.get("uri"): mode = "uri" data = get_from_uri(arguments.get("uri")) edition = Edition(data, wikidata_site, data_files, existing_editions, cache, mode) problem_report = edition.get_report() if arguments.get("upload"): live = True if arguments["upload"] == "live" else False uploader = Uploader(edition, repo=wikidata_site, live=live, edit_summary=EDIT_SUMMARY) if "Q" in problem_report and problem_report["Q"] == "": problem_report["Q"] = uploader.wd_item_q try: uploader.upload() except pywikibot.data.api.APIError as e: print(e) elif arguments.get("dir") and arguments.get("libris_list"): mode = "local" available_files = list_available_files(arguments.get("dir")) libris_list = get_lines_from_file(arguments["libris_list"]) for fname in available_files: data = utils.load_json(fname) selibr = get_libris_id(data) if selibr and selibr in libris_list: edition = Edition(data, wikidata_site, data_files, existing_editions, cache, mode) problem_report = edition.get_report() if arguments.get("upload"): live = True if arguments["upload"] == "live" else False uploader = Uploader(edition, repo=wikidata_site, live=live, edit_summary=EDIT_SUMMARY) if "Q" in problem_report and problem_report["Q"] == "": problem_report["Q"] = uploader.wd_item_q try: uploader.upload() except pywikibot.data.api.APIError as e: print(e)
def main(arguments): """Get arguments and process data.""" libris_files = list_available_files(arguments.get("dir"), arguments.get("limit"), arguments.get("uri")) filenames = make_filenames(utils.get_current_timestamp()) wikidata_site = utils.create_site_instance("wikidata", "wikidata") data_files = load_mapping_files() existing_people = utils.get_wd_items_using_prop( data_files["properties"]["libris_uri"]) problem_reports = [] for fname in libris_files: data = utils.load_json(fname) cache = load_caches(["surname", "first_name"]) if is_person(data): person = Person(data, wikidata_site, data_files, existing_people, cache) dump_caches(person.get_caches()) problem_report = person.get_report() if arguments.get("upload"): live = True if arguments["upload"] == "live" else False uploader = Uploader(person, repo=wikidata_site, live=live, edit_summary=EDIT_SUMMARY) if "Q" in problem_report and problem_report["Q"] == "": """ If the Person didn't have an associated Qid, this means the Uploader has now created a new Item for it -- insert that id into the problem report. """ problem_report["Q"] = uploader.wd_item_q try: uploader.upload() except pywikibot.data.api.APIError as e: print(e) if problem_report: problem_reports.append(problem_report) utils.json_to_file( filenames['reports'], problem_reports, silent=True) if problem_reports: print("SAVED PROBLEM REPORTS TO {}".format(filenames['reports']))
def get_items(connection, dataset, upload, short=False, offset=None, table=False, list_matches=False): """ Retrieve data from database and process it. :param connection: Connection used to access the database. :param dataset: The Database instance to work on. :param upload: Whether to upload the processed items. :param short: Optional number of randomly selected rows to process. :param offset: Optional offset to retrieve rows. :param table: Whether to save the results as a wikitable. :param list_matches: Whether to save a list of matched items and their P31 values for copy/pasting to Wikidata. """ filenames = make_filenames(dataset.table_name, utils.get_current_timestamp()) if upload: logger = Logger() if not utils.table_exists(connection, dataset.table_name): print("Table does not exist.") return mapping = Mapping(dataset) unique_prop = mapping.get_unique_prop() if unique_prop is not None: existing = get_wd_items_using_prop(unique_prop) else: existing = None query = make_query(dataset.table_name, offset) print_row_count(dataset.table_name, connection) database_rows = select_query(query, connection) if short: database_rows = utils.get_random_list_sample(database_rows, short) print("USING RANDOM SAMPLE OF " + str(short)) matched_item_p31s = {} problem_reports = [] skipped_uploads = [] wikidata_site = utils.create_site_instance("wikidata", "wikidata") data_files = load_data(dataset) counter = 0 for row in database_rows: if not upload and counter % 100 == 0: # visual feedback needed for preview runs print(".", end="", flush=True) counter += 1 monument = dataset.monument_class(row, mapping, data_files, existing, wikidata_site) problem_report = monument.get_report() if not monument.upload: skipped_uploads.append(monument) if table: raw_data = "<pre>" + str(row) + "</pre>\n" monument_table = monument.print_wd_to_table() utils.append_line_to_file(raw_data, filenames['examples']) utils.append_line_to_file(monument_table, filenames['examples']) if upload: live = True if upload == "live" else False uploader = Uploader(monument, repo=wikidata_site, log=logger, tablename=dataset.country, live=live) if "Q" in problem_report and problem_report["Q"] == "": """ If the Monument didn't have an associated Qid, this means the Uploader has now created a new Item for it -- insert that id into the problem report. """ problem_report["Q"] = uploader.wd_item_q uploader.upload() print("--------------------------------------------------") if list_matches: match_info = monument.get_matched_item_p31s() if match_info: for p31 in match_info[0]: if p31 not in matched_item_p31s: matched_item_p31s[p31] = [] matched_item_p31s[p31].append( (match_info[1], match_info[2])) if problem_report: # dictionary is not empty problem_reports.append(problem_report) utils.json_to_file(filenames['reports'], problem_reports, silent=True) if not upload: print("\n") # linebreak needed in case of visual feedback dots if problem_reports: print("SAVED PROBLEM REPORTS TO {}".format(filenames['reports'])) if skipped_uploads: skipped_items_output = "\n".join(format_skipped_items(skipped_uploads)) utils.save_to_file(filenames['skipped'], skipped_items_output, silent=True) print("SAVED {0} SKIPPED UPLOADS TO {1}".format( len(skipped_uploads), filenames['skipped'])) if table: print("SAVED TEST RESULTS TO {}".format(filenames['examples'])) if list_matches: matched_items_output = ( '{| class="wikitable sortable"\n' "! matched item {{P|P31}} !! frequency !! wlm-id(s) [max 10] \n") matched_items_output += "\n".join( format_matched_p31s_rows(matched_item_p31s)) matched_items_output += "\n|}" utils.save_to_file(filenames['matches'], matched_items_output)
def make_image_item(self, filename): commonssite = utils.create_site_instance("commons", "commons") imagelink = pywikibot.Link(filename, source=commonssite, defaultNamespace=6) return pywikibot.FilePage(imagelink)