Exemple #1
0
def main(arguments):
    """Process the arguments and fetch data according to them"""
    arguments = vars(arguments)
    current_time = utils.get_current_timestamp()
    wikidata_site = utils.create_site_instance("wikidata", "wikidata")
    existing_areas = get_wd_items_using_prop("P3613")
    area_data = load_nature_area_file(arguments["dataset"])
    data_files = load_mapping_files()
    if arguments["offset"]:
        print("Using offset: {}.".format(str(arguments["offset"])))
        area_data = area_data[arguments["offset"]:]
    if arguments["limit"]:
        print("Using limit: {}.".format(str(arguments["limit"])))
        area_data = area_data[:arguments["limit"]]
    for area in area_data:
        reserve = NatureArea(area, wikidata_site, data_files, existing_areas)
        if arguments["table"]:
            filename = "{}_{}.txt".format(arguments["dataset"], current_time)
            preview = PreviewTable(reserve)
            utils.append_line_to_file(preview.make_table(), filename)
        if arguments["upload"]:
            live = True if arguments["upload"] == "live" else False
            if arguments["dataset"] == "nr":
                edit_summary = edit_summary_reserves
            elif arguments["dataset"] == "np":
                edit_summary = edit_summary_nationalparks
            uploader = Uploader(reserve,
                                repo=wikidata_site,
                                live=live,
                                edit_summary=edit_summary)
            uploader.upload()
def main(arguments):
    arguments = vars(arguments)
    wikidata_site = utils.create_site_instance("wikidata", "wikidata")
    existing_people = get_wd_items_using_prop("P4357")
    auth_data = load_auth_file()
    data_files = load_mapping_files()
    if arguments["offset"]:
        print("Using offset: {}.".format(str(arguments["offset"])))
        auth_data = auth_data[arguments["offset"]:]
    if arguments["limit"]:
        print("Using limit: {}.".format(str(arguments["limit"])))
        auth_data = auth_data[:arguments["limit"]]
    for p in auth_data:
        p_data = p[list(p.keys())[0]]
        person = Person(p_data, wikidata_site, data_files, existing_people)
        if arguments["upload"]:
            live = True if arguments["upload"] == "live" else False
            uploader = Uploader(
                person,
                repo=wikidata_site,
                live=live,
                edit_summary="importing #Musikverket authority file")
            try:
                uploader.upload()
            except pywikibot.data.api.APIError:
                continue
def main(arguments):
    """Process the arguments and fetch data according to them."""
    arguments = vars(arguments)
    filenames = generate_filenames("Bosnia", utils.get_current_timestamp())
    repo = utils.create_site_instance("wikidata", "wikidata")
    source_data = load_source_data(arguments["datadir"])
    data_files = load_mapping_files()
    mapping = data_files["_static"]["ba_(bs)"]
    existing = get_wd_items_using_prop(
        data_files["_static"]["ba_(bs)"]["unique"]["property"])
    if arguments["offset"]:
        print("Using offset: {}.".format(str(arguments["offset"])))
        source_data = source_data[arguments["offset"]:]
    if arguments["short"]:
        print("Using limit: {}.".format(str(arguments["short"])))
        source_data = source_data[:arguments["short"]]
    for row in source_data:
        monument = BosniaMonument(row, mapping, data_files, existing, repo)
        if arguments["table"]:
            raw_data = "<pre>" + str(row) + "</pre>\n"
            monument_table = monument.print_wd_to_table()
            utils.append_line_to_file(raw_data, filenames['examples'])
            utils.append_line_to_file(monument_table, filenames['examples'])
        if arguments["upload"]:
            live = True if arguments["upload"] == "live" else False
            uploader = Uploader(monument, repo=repo, live=live, tablename="ba")
            uploader.upload()
Exemple #4
0
def main(arguments):
    wikidata_site = utils.create_site_instance("wikidata", "wikidata")
    data_files = load_mapping_files()
    cache = {}
    existing_editions = utils.get_wd_items_using_prop(
        data_files["properties"]["libris_uri"])
    if arguments.get("uri"):
        mode = "uri"
        data = get_from_uri(arguments.get("uri"))
        edition = Edition(data, wikidata_site, data_files, existing_editions,
                          cache, mode)
        problem_report = edition.get_report()
        if arguments.get("upload"):
            live = True if arguments["upload"] == "live" else False
            uploader = Uploader(edition,
                                repo=wikidata_site,
                                live=live,
                                edit_summary=EDIT_SUMMARY)
            if "Q" in problem_report and problem_report["Q"] == "":
                problem_report["Q"] = uploader.wd_item_q
            try:
                uploader.upload()
            except pywikibot.data.api.APIError as e:
                print(e)
    elif arguments.get("dir") and arguments.get("libris_list"):
        mode = "local"
        available_files = list_available_files(arguments.get("dir"))
        libris_list = get_lines_from_file(arguments["libris_list"])
        for fname in available_files:
            data = utils.load_json(fname)
            selibr = get_libris_id(data)
            if selibr and selibr in libris_list:
                edition = Edition(data, wikidata_site, data_files,
                                  existing_editions, cache, mode)
                problem_report = edition.get_report()
                if arguments.get("upload"):
                    live = True if arguments["upload"] == "live" else False
                    uploader = Uploader(edition,
                                        repo=wikidata_site,
                                        live=live,
                                        edit_summary=EDIT_SUMMARY)
                    if "Q" in problem_report and problem_report["Q"] == "":
                        problem_report["Q"] = uploader.wd_item_q
                    try:
                        uploader.upload()
                    except pywikibot.data.api.APIError as e:
                        print(e)
def main(arguments):
    """Get arguments and process data."""
    libris_files = list_available_files(arguments.get("dir"),
                                        arguments.get("limit"),
                                        arguments.get("uri"))
    filenames = make_filenames(utils.get_current_timestamp())

    wikidata_site = utils.create_site_instance("wikidata", "wikidata")
    data_files = load_mapping_files()
    existing_people = utils.get_wd_items_using_prop(
        data_files["properties"]["libris_uri"])
    problem_reports = []

    for fname in libris_files:
        data = utils.load_json(fname)
        cache = load_caches(["surname", "first_name"])
        if is_person(data):
            person = Person(data,
                            wikidata_site,
                            data_files,
                            existing_people,
                            cache)
            dump_caches(person.get_caches())
            problem_report = person.get_report()
            if arguments.get("upload"):
                live = True if arguments["upload"] == "live" else False
                uploader = Uploader(person, repo=wikidata_site,
                                    live=live, edit_summary=EDIT_SUMMARY)
                if "Q" in problem_report and problem_report["Q"] == "":
                    """
                    If the Person didn't have an associated Qid,
                    this means the Uploader has now created a new Item
                    for it -- insert that id into the problem report.
                    """
                    problem_report["Q"] = uploader.wd_item_q
                try:
                    uploader.upload()
                except pywikibot.data.api.APIError as e:
                    print(e)

            if problem_report:
                problem_reports.append(problem_report)
                utils.json_to_file(
                    filenames['reports'], problem_reports, silent=True)
    if problem_reports:
        print("SAVED PROBLEM REPORTS TO {}".format(filenames['reports']))
Exemple #6
0
def get_items(connection,
              dataset,
              upload,
              short=False,
              offset=None,
              table=False,
              list_matches=False):
    """
    Retrieve data from database and process it.

    :param connection: Connection used to access the database.
    :param dataset: The Database instance to work on.
    :param upload: Whether to upload the processed items.
    :param short: Optional number of randomly selected rows to process.
    :param offset: Optional offset to retrieve rows.
    :param table: Whether to save the results as a wikitable.
    :param list_matches: Whether to save a list of matched items and their
        P31 values for copy/pasting to Wikidata.
    """
    filenames = make_filenames(dataset.table_name,
                               utils.get_current_timestamp())
    if upload:
        logger = Logger()
    if not utils.table_exists(connection, dataset.table_name):
        print("Table does not exist.")
        return
    mapping = Mapping(dataset)
    unique_prop = mapping.get_unique_prop()
    if unique_prop is not None:
        existing = get_wd_items_using_prop(unique_prop)
    else:
        existing = None
    query = make_query(dataset.table_name, offset)

    print_row_count(dataset.table_name, connection)
    database_rows = select_query(query, connection)
    if short:
        database_rows = utils.get_random_list_sample(database_rows, short)
        print("USING RANDOM SAMPLE OF " + str(short))

    matched_item_p31s = {}
    problem_reports = []
    skipped_uploads = []

    wikidata_site = utils.create_site_instance("wikidata", "wikidata")
    data_files = load_data(dataset)
    counter = 0
    for row in database_rows:
        if not upload and counter % 100 == 0:
            # visual feedback needed for preview runs
            print(".", end="", flush=True)
        counter += 1
        monument = dataset.monument_class(row, mapping, data_files, existing,
                                          wikidata_site)
        problem_report = monument.get_report()
        if not monument.upload:
            skipped_uploads.append(monument)
        if table:
            raw_data = "<pre>" + str(row) + "</pre>\n"
            monument_table = monument.print_wd_to_table()
            utils.append_line_to_file(raw_data, filenames['examples'])
            utils.append_line_to_file(monument_table, filenames['examples'])
        if upload:
            live = True if upload == "live" else False
            uploader = Uploader(monument,
                                repo=wikidata_site,
                                log=logger,
                                tablename=dataset.country,
                                live=live)
            if "Q" in problem_report and problem_report["Q"] == "":
                """
                If the Monument didn't have an associated Qid,
                this means the Uploader has now created a new Item
                for it -- insert that id into the problem report.
                """
                problem_report["Q"] = uploader.wd_item_q

            uploader.upload()
            print("--------------------------------------------------")
        if list_matches:
            match_info = monument.get_matched_item_p31s()
            if match_info:
                for p31 in match_info[0]:
                    if p31 not in matched_item_p31s:
                        matched_item_p31s[p31] = []
                    matched_item_p31s[p31].append(
                        (match_info[1], match_info[2]))
        if problem_report:  # dictionary is not empty
            problem_reports.append(problem_report)
            utils.json_to_file(filenames['reports'],
                               problem_reports,
                               silent=True)

    if not upload:
        print("\n")  # linebreak needed in case of visual feedback dots
    if problem_reports:
        print("SAVED PROBLEM REPORTS TO {}".format(filenames['reports']))
    if skipped_uploads:
        skipped_items_output = "\n".join(format_skipped_items(skipped_uploads))
        utils.save_to_file(filenames['skipped'],
                           skipped_items_output,
                           silent=True)
        print("SAVED {0} SKIPPED UPLOADS TO {1}".format(
            len(skipped_uploads), filenames['skipped']))
    if table:
        print("SAVED TEST RESULTS TO {}".format(filenames['examples']))
    if list_matches:
        matched_items_output = (
            '{| class="wikitable sortable"\n'
            "! matched item {{P|P31}} !! frequency !! wlm-id(s) [max 10] \n")
        matched_items_output += "\n".join(
            format_matched_p31s_rows(matched_item_p31s))
        matched_items_output += "\n|}"
        utils.save_to_file(filenames['matches'], matched_items_output)
Exemple #7
0
 def make_image_item(self, filename):
     commonssite = utils.create_site_instance("commons", "commons")
     imagelink = pywikibot.Link(filename,
                                source=commonssite,
                                defaultNamespace=6)
     return pywikibot.FilePage(imagelink)