コード例 #1
0
def main():
    # Main code goes here.

    asf.setServer("Prod")

    output_folder = "output/resource_remove_links"
    the_lookup_csv = "id_lookup_prod.csv"
    bibid_file = "/Users/dwh2128/Documents/ACFA/TEST/ACFA-161-remove-links/acfa-161-remove-links.txt"

    # Read a list of bibids (csv)
    the_bibids = []
    with open(bibid_file) as ids:
        for row in csv.reader(ids):
            the_bibids.append(row[0])

    for b in the_bibids:

        try:
            repo, asid = asf.lookupByBibID(b, the_lookup_csv)
            print("Processing " + str(b) + "...")

            out_path_old = (output_folder + "/" + str(repo) + "_" + str(asid) +
                            "_old.json")
            out_path_new = (output_folder + "/" + str(repo) + "_" + str(asid) +
                            "_new.json")

            x = asf.getResource(repo, asid)

            # Save copy of existing object
            print("Saving data to " + out_path_old + "....")
            with open(out_path_old, "w+") as f:
                f.write(x)

            x_dict = json.loads(x)
            print(x_dict["ead_location"])
            if "ead_location" in x_dict:
                del x_dict["ead_location"]
            else:
                print("No URL to delete!")

            y = json.dumps(x_dict)
            # print(y)

            post = asf.postResource(repo, asid, y)
            print(post)

            # Save copy of new object
            print("Saving data to " + out_path_new + "....")

            with open(out_path_new, "w+") as f:
                f.write(y)

        except:
            print("Error: Could not process " + str(b))
            print(sys.exc_info())
            # raise

    quit()
コード例 #2
0
def main():
    # Main code goes here.

    asf.setServer("Prod")

    lookup_csv = "id_lookup_prod.csv"
    id_file = "/Users/dwh2128/Documents/ACFA/TEST/ACFA-226-oclc/035s_20200915.txt"

    # Read a list of bibids and oclc strings
    the_data = []
    with open(id_file) as ids:
        for row in csv.reader(ids, delimiter="|"):
            the_data.append([row[0], row[1], row[2]])

    for a_row in the_data:
        bibid = a_row[0]
        print(bibid)
        str_2 = a_row[1]
        str_3 = a_row[2]
        try:
            repo, asid = asf.lookupByBibID(bibid, lookup_csv)

            x = asf.getResource(repo, asid)
            y = json.loads(x)

            user_defnd = y["user_defined"] if "user_defined" in y else {}
            user_defnd["string_2"] = str_2
            user_defnd["string_3"] = str_3

            print(user_defnd)

            y["user_defined"] = user_defnd

            z = json.dumps(y)
            post = asf.postResource(repo, asid, z)
            print(post)

        except Exception as e:
            print(e + ": Could not lookup " + str(bibid))
コード例 #3
0
def main():
    # Main code goes here.

    my_name = __file__

    # This makes sure the script can be run from any working directory and still find related files.
    my_path = os.path.dirname(__file__)

    asf.setServer("Prod")

    the_sheet = dataSheet("1UQm7ffd1Kq4zqlzHZajd9YkwW1_nmOJFS1W7nI-c_Vk",
                          "new-batch!A:Z")
    output_folder = os.path.join(my_path, "output/resource_collecting_area")

    the_rows = the_sheet.getData()
    the_new_rows = []

    the_heads = the_rows.pop(0)

    the_new_rows.append(the_heads)

    coll_area_index = 8  # the column of collecting area

    for a_row in the_rows:
        the_new_row = a_row
        # print(a_row)
        coll = ""
        repo, asid = a_row[0], a_row[1]
        if len(a_row) >= coll_area_index:
            # if there is a collecting location to add
            coll = a_row[coll_area_index]

            the_resource = asf.getResource(repo, asid)

            out_path_old = (output_folder + "/" + str(repo) + "_" + str(asid) +
                            "_old.json")
            out_path_new = (output_folder + "/" + str(repo) + "_" + str(asid) +
                            "_new.json")

            # Save copy of existing object
            print("Saving data to " + out_path_old + "....")
            with open(out_path_old, "w+") as f:
                f.write(the_resource)

            the_data = json.loads(the_resource)

            fix = False
            if "user_defined" in the_data:
                the_user_defined = the_data["user_defined"]
                if "enum_4" in the_user_defined:
                    print("Already has enum_4! Skipping.")
                else:
                    fix = True
                    the_user_defined["enum_4"] = coll
                    the_data["user_defined"] = the_user_defined
                    the_new_resource = json.dumps(the_data)

                    # Save copy of new object
                    print("Saving data to " + out_path_new + "....")
                    with open(out_path_new, "w+") as f:
                        f.write(the_new_resource)

                if fix == True:

                    try:
                        post = "[NONE]"
                        post = asf.postResource(repo, asid, the_new_resource)
                        print(post)
                    except:
                        print("Error: There was a problem posting resource " +
                              str(repo) + ":" + str(asid) + "!")

                    the_new_row.append(coll)
            else:
                print("ERROR: No user_defined data in " + str(repo) + ":" +
                      str(asid))

        the_new_rows.append(the_new_row)

    the_sheet.clear()
    the_sheet.appendData(the_new_rows)

    # print(the_new_rows)

    quit()
コード例 #4
0
def main():

    # set to True to use test sheet and test json folder location.
    debug = False

    asf.setServer("Prod")

    my_name = __file__
    script_name = os.path.basename(my_name)

    # This makes sure the script can be run from any working directory and still find related files.
    my_path = os.path.dirname(__file__)

    now1 = datetime.now()
    start_time = str(now1)
    end_time = ""  # set later
    today_str = str((date.today()).strftime("%Y%m%d"))

    if debug:
        print("[Running script in debug mode...]")
        parent_folder = "/cul/cul0/ldpd/archivesspace/test/resources"  # test folder
        sheet_id = "1wFyLN_Ea7ExCZSMuksB8MTrS9DjsUkwsmaPBujL7x0U"  # test sheet
        the_repos = [4]  # to test
    else:
        parent_folder = "/cul/cul0/ldpd/archivesspace/resources"
        sheet_id = "1T3EpIZmnh3Gk-VAIGtvavTQUIpS7AluyKQ8-sJsS8vg"
        the_repos = [2, 3, 4, 5, 6]

    output_folder = parent_folder + "/" + today_str

    the_sheets = {
        "resources": dataSheet(sheet_id, "Resources!A:Z"),
        "cm": dataSheet(sheet_id, "Collection Management!A:Z"),
        "log": dataSheet(sheet_id, "log!A:Z"),
    }

    # Set number of chars to truncate the scope and bioghist notes.
    trunc_len = 400

    # List of fields to extract, expressed as dpaths.
    the_fields = [
        ["bibid", "/id_0"],
        ["title", "/title"],
        ["published", "/publish"],
        ["create_time", "/create_time"],
        ["system_mtime", "/system_mtime"],
        ["created_by", "/created_by"],
        ["last_modified_by", "/last_modified_by"],
        ["ead_location", "/ead_location"],
        ["ext_number", "/extents/0/number"],
        ["ext_portion", "/extents/0/portion"],
        ["ext_type", "/extents/0/extent_type"],
        # ["integer_1", "/user_defined/integer_1"],
        # ["integer_2", "/user_defined/integer_2"],
        # ["integer_3", "/user_defined/integer_3"],
        ["local call no.", "/user_defined/string_1"],
        ["other ctrl no. 1", "/user_defined/string_2"],
        ["other ctrl no. 2", "/user_defined/string_3"],
        ["other ctrl no. 3", "/user_defined/string_4"],
        # ["enum_1", "/user_defined/enum_1"],
        # ["enum_2", "/user_defined/enum_2"],
        ["description status", "/user_defined/enum_3"],
        ["collecting area", "/user_defined/enum_4"],
        ["level", "level"]
        # (Scope and bioghist notes are added in separately below.)
    ]

    # Get the collection management records for use in report.

    the_cms = []

    fields = [
        "id",
        "parent_id",
        "title",
        "system_mtime",
        "processing_priority",
        "processing_status",
    ]

    print(" ")
    print("*** Retrieve Collection Management Data ***")
    print(" ")

    for r in the_repos:
        print("Getting collection management records for repo: " + str(r) +
              "...")
        cm = asf.getCollectionManagements(r, filter="resource", fields=fields)
        for c in cm:
            row = [c[f] for f in fields]
            the_cms.append(row)

    # a data set of collection managment records to post to sheet below.
    the_cms.insert(0, fields)

    print(" ")
    print("*** Retrieve Resource Data ***")
    print(" ")

    # Get the list of resources for each repo and add to the_ids
    the_ids = []
    for r in the_repos:
        print("Getting ids for repo: " + str(r) + "...")
        asids = json.loads(
            asf.getResponse("/repositories/" + str(r) +
                            "/resources?all_ids=true"))

        print(str(len(asids)) + " records found in repo " + str(r) + ".")
        for i in asids:
            the_ids.append([r, i])

    # Construct the head row
    the_heads = [x[0] for x in the_fields]
    the_heads.insert(0, "asid")
    the_heads.insert(0, "repo")
    the_heads.append("scope note")
    the_heads.append("scopenote length")

    the_heads.append("bioghist note")
    the_heads.append("biognote length")

    the_output = [the_heads]

    # Fetch the resources from the ids
    print("Downloading resources...")

    if not os.path.exists(output_folder):
        print("Creating directory " + output_folder + "...")
        os.makedirs(output_folder)

    for repo, asid in the_ids:
        # print("Processsing " + str(repo) + ":" + str(asid) + "...")
        the_row = [repo, asid]
        res_json = asf.getResource(repo, asid)
        res_dict = json.loads(res_json)

        out_path = output_folder + "/" + str(repo) + "_" + str(asid) + ".json"

        # Write the JSON to file.
        with open(out_path, "w+") as f:
            f.write(res_json)

        # Use dpath to extract values from dict and compose into rows.
        for af in the_fields:
            try:
                d = str(dpath.util.get(res_dict, af[1]))
            except:
                d = ""
            the_row.append(d)

        # Process scope and bioghist notes

        the_notes = dpath.util.values(res_dict, "notes/*", afilter=None)

        the_scope_notes = []
        the_biog_notes = []

        for a_note in the_notes:
            try:
                if a_note["type"] == "scopecontent":
                    the_scope_notes.append(a_note)
            except:
                pass
            try:
                if a_note["type"] == "bioghist":
                    the_biog_notes.append(a_note)
            except:
                pass

        if the_scope_notes:
            # If there are scope notes, grab all the text and concatenate. Then get the total length in # chars.
            scope_note_texts = [
                s["subnotes"][0]["content"] for s in the_scope_notes
            ]
            the_scope_text = " ".join(scope_note_texts)
            scope_note_len = len(the_scope_text)

            scope_note_short = truncate_str(the_scope_text, length=trunc_len)
        else:
            scope_note_short = ""
            scope_note_len = 0

        if the_biog_notes:
            # If there are bioghist notes, grab all the text and concatenate. Then get the total length in # chars.
            biog_note_texts = [
                s["subnotes"][0]["content"] for s in the_biog_notes
            ]
            the_biog_text = " ".join(biog_note_texts)
            biog_note_len = len(the_biog_text)

            biog_note_short = truncate_str(the_biog_text, length=trunc_len)
        else:
            biog_note_short = ""
            biog_note_len = 0

        the_row.append(scope_note_short)
        the_row.append(str(scope_note_len))
        the_row.append(biog_note_short)
        the_row.append(str(biog_note_len))

        the_output.append(the_row)

    # Zip up the JSON files for storage.
    zip_out = make_archive(today_str,
                           "zip",
                           root_dir=parent_folder,
                           base_dir=today_str)

    print(zip_out)

    # Zip is saved in working dir; move to correct location.
    print("Saving zip file " + str(today_str) + ".zip to " + parent_folder)

    # Test if file already exists.
    if os.path.exists(parent_folder + "/" + str(today_str) + ".zip"):
        print("File " + parent_folder + "/" + str(today_str) +
              ".zip exists already. Replacing with new zip file...")

        os.remove(parent_folder + "/" + str(today_str) + ".zip")

    move(zip_out, parent_folder)

    # Remove the json folder once zip is in place.
    rmtree(parent_folder + "/" + today_str)

    util.file_cleanup(parent_folder, 60)

    # Write output to Google sheet.

    print(" ")
    print("*** Writing Data to Report ***")
    print(" ")

    the_sheets["cm"].clear()
    the_sheets["cm"].appendData(the_cms)
    digester.post_digest(
        script_name,
        "Total collection management records: " + str(len(the_cms) - 1))

    the_sheets["resources"].clear()
    the_sheets["resources"].appendData(the_output)
    digester.post_digest(
        script_name,
        "Total number of resource records: " + str(len(the_output) - 1))

    ########################
    ### FINISH UP ###
    ########################

    # Generate log string.
    now2 = datetime.now()
    end_time = str(now2)
    my_duration = str(now2 - now1)

    the_log = ("Data imported by " + my_name + ". Start: " + start_time +
               ". Finished: " + end_time + " (duration: " + my_duration + ").")

    the_sheets["log"].appendData([[the_log]])

    print(" ")

    print(the_log)

    print(" ")

    exit_msg = "Script done. Updated data is available at " + \
        the_sheets["resources"].url
    print(exit_msg)
    digester.post_digest(script_name, exit_msg)
コード例 #5
0
ファイル: pypypy.py プロジェクト: cul/dcps-utils
x = the_sheet.matchingRows([['BIBID', '4079432'], ['Title', '.*Humph.*']])

print(x)

print(' ')

x = the_sheet.lookup('4079432', 0, 1)

print(x)

print(' ')

print('testing archivesspace api...')

x = asf.getResource(2, 5907)

print(x)

print(' ')

print("testing saxon ...")

saxon_path = os.path.join(my_path, '../../resources/saxon-9.8.0.12-he.jar')
source_dir = '/cul/cul0/ldpd/archivesspace/oai'
in_file = os.path.join(source_dir, '20201111.asClean.xml')
xsl_file = os.path.join(my_path, '../xslt/extract-bibids.xsl')
params = 'filename=' + in_file
x = util.saxon_process(in_file, xsl_file, None, theParams=params)
print(x)
コード例 #6
0
def main():
    # Main code goes here.

    asf.setServer("Prod")

    on_site = False
    # set to True to get on-site note, False to get off-site note. See the_access_note var below.

    output_folder = "output/resource_on-site_access"

    lookup_csv = "id_lookup_prod.csv"

    # bibid_file = (
    #     "/Users/dwh2128/Documents/ACFA/TEST/ACFA-224-onsite-notes/acfa-224-list_3.csv"
    # )
    bibid_file = (
        "/Users/dwh2128/Documents/ACFA/TEST/ACFA-243-off-site/acfa-243_off-site.csv"
    )

    # Read a list of bibids (csv)
    the_bibids = []
    with open(bibid_file) as ids:
        for row in csv.reader(ids):
            the_bibids.append(row[0])

    if on_site == True:
        the_access_note = {
            "jsonmodel_type": "note_multipart",
            "label": "Restrictions on Access",
            "type": "accessrestrict",
            "rights_restriction": {"local_access_restriction_type": []},
            "subnotes": [
                {
                    "jsonmodel_type": "note_text",
                    "content": "This collection is located on-site.",
                    "publish": True,
                }
            ],
            "publish": True,
        }
    else:
        the_access_note = {
            "jsonmodel_type": "note_multipart",
            "label": "Restrictions on Access",
            "type": "accessrestrict",
            "rights_restriction": {"local_access_restriction_type": []},
            "subnotes": [
                {
                    "jsonmodel_type": "note_text",
                    "content": "This collection is located off-site. You will need to request this material at least three business days in advance to use the collection in the Rare Book and Manuscript Library reading room.",
                    "publish": True,
                }
            ],
            "publish": True,
        }

    for bib in the_bibids:

        try:
            repo, asid = asf.lookupByBibID(bib, lookup_csv)
        except:
            print("Error: No record found for " + str(bib) + ". Skipping...")
            continue

        out_path_old = output_folder + "/" + str(repo) + "_" + str(asid) + "_old.json"
        out_path_new = output_folder + "/" + str(repo) + "_" + str(asid) + "_new.json"

        the_resource = asf.getResource(repo, asid)

        # Save copy of existing object
        print("Saving data to " + out_path_old + "....")

        with open(out_path_old, "w+") as f:
            f.write(the_resource)

        the_data = json.loads(the_resource)

        # Test if there is already an access restriction note.
        has_note = False
        for a_note in the_data["notes"]:
            try:
                if a_note["type"] == "accessrestrict":
                    has_note = True
            except KeyError:
                print("Note has no type -- skipping.")

        if has_note == True:
            print(str(bib) + " - Warning: Already has access note.")
        # else:
        the_data["notes"].append(the_access_note)

        the_new_resource = json.dumps(the_data)

        # Save copy of new object
        print("Saving data to " + out_path_new + "....")

        with open(out_path_new, "w+") as f:
            f.write(the_new_resource)

        try:
            post = asf.postResource(repo, asid, the_new_resource)
            print(post)
        except:
            print(
                "Error: There was a problem posting resource "
                + str(repo)
                + ":"
                + str(asid)
                + "!"
            )

    quit()
コード例 #7
0
def main():

    asf.setServer("Prod")

    # the_lookup_csv = "id_lookup_TEST.csv"  # test
    the_lookup_csv = "id_lookup_prod.csv"  # test

    output_folder = "output/resource_language_encode"

    the_sheet = dataSheet("1eTPY7AbDvjDU-lzK2VQruvZAvlGkAJZglh2JrruPvdg", "Test6!A:Z")

    the_data = the_sheet.getData()

    the_new_data = []
    the_new_data.append(the_data.pop(0))

    counter = 0

    for a_row in the_data:

        counter += 1
        print(" ")
        print(counter)

        the_new_row = a_row
        the_bibid = a_row[0]
        the_041 = a_row[1]
        the_string = a_row[3]

        res_info = asf.lookupByBibID(the_bibid, the_lookup_csv)

        if res_info:
            out_path_old = (
                output_folder
                + "/"
                + str(res_info[0])
                + "_"
                + str(res_info[1])
                + "_old.json"
            )
            out_path_new = (
                output_folder
                + "/"
                + str(res_info[0])
                + "_"
                + str(res_info[1])
                + "_new.json"
            )

            # pull down the resource
            the_resource = asf.getResource(res_info[0], res_info[1])

            # Save copy of existing object
            print("Saving data to " + out_path_old + "....")

            with open(out_path_old, "w+") as f:
                f.write(the_resource)

            res_dict = json.loads(the_resource)

            langmaterials = res_dict["lang_materials"]

            # Collect encoded languages already present. There should be just one but not guaranteed, so make a list.
            primary_langs = []
            for n in langmaterials:
                try:
                    if n["language_and_script"]:
                        # print("YES")
                        primary_langs.append(n["language_and_script"]["language"])
                except:
                    print("Exception!")

            print("old:")
            print(primary_langs)

            print("new:")
            langs_parsed = language_lookup(the_string)
            print(langs_parsed)

            print("to add: ")
            langs_diff = diff(langs_parsed, primary_langs)
            print(langs_diff)

            if len(langs_diff) > 0:

                for l in langs_diff:
                    res_dict["lang_materials"].append(make_language_note(l))

                new_resource = json.dumps(res_dict)
                # Save new object
                print("Saving data to " + out_path_new + "....")

                with open(out_path_new, "w+") as f:
                    f.write(new_resource)

                # Post new resource back to API

                print("Posting data for " + str(res_info[0]) + " : " + str(res_info[1]))
                try:
                    post = asf.postResource(res_info[0], res_info[1], new_resource)
                    print(post)
                except:
                    print(
                        "Error: There was a problem posting resource "
                        + str(res_info[0])
                        + ":"
                        + str(res_info[1])
                        + "!"
                    )
                    langs_diff.append("[ERROR]")

            else:
                print("No new languages to add. Skipping.")

            the_new_row.append(",".join(langs_diff))
            the_new_data.append(the_new_row)

    the_sheet.clear()
    the_sheet.appendData(the_new_data)
コード例 #8
0
ファイル: test_asfunctions.py プロジェクト: cul/dcps-utils
def test_get_resource_dev():
    x = json.loads(asf.getResource(2, 5907))
    assert x[
        'id_0'] == '4078601', "Dev: BIBID for resource 2:5907 should be 4078601"
コード例 #9
0
def main():

    asf.setServer('Test')

    # Google sheet used for reporting changes.

    the_report_sheet = dataSheet(
        '1wNO0t2j5G9U0hUmb7E-jLd4T5skTs1aRxN7HrlyZwEI', 'resources!A:Z')

    id_file = 'resource_replacements.csv'
    output_folder = 'output/resource_replacements'

    # Read a list of repo and object ids (csv)
    the_ids = []
    ids = open(id_file)
    for row in csv.reader(ids):
        the_ids.append([row[0], row[1]])
    ids.close()

    # Search/replace patterns
    the_search_pattern = 'NCC'
    the_replace_pattern = 'NNC'

    the_before_afters = []

    the_heads = ['repo', 'asid', 'before', 'after']

    the_before_afters.append(the_heads)

    for an_obj in the_ids:

        out_path = output_folder + '/' + an_obj[0] + '_' + an_obj[
            1] + '_old.json'

        # read from API
        x = asf.getResource(an_obj[0], an_obj[1])

        # Save copy of existing object
        print('Saving data to ' + out_path + '....')

        f = open(out_path, "w+")
        f.write(x)
        f.close()

        x = json.loads(x)

        the_old_field_data = x['user_defined']['string_2']

        y = x

        y['user_defined']['string_2'] = re.sub(the_search_pattern,
                                               the_replace_pattern,
                                               x['user_defined']['string_2'])

        if y['user_defined']['string_2'] == the_old_field_data:
            the_new_field_data = "[no change]"
        else:
            the_new_field_data = y['user_defined']['string_2']

        the_before_afters.append([
            an_obj[0], an_obj[1], '{string_2} ' + the_old_field_data,
            '{string_2} ' + the_new_field_data
        ])

        # convert dict back to json for posting.
        z = json.dumps(y)

        # Post the fixed object back to API.
        post = asf.postResource(an_obj[0], an_obj[1], z)
        print(post)

    # Report changes to Google Sheet

    print('Writing before/after info to sheet...')
    the_report_sheet.clear()
    the_report_sheet.appendData(the_before_afters)