Esempio n. 1
0
def main():
    # Main code goes here.

    asf.setServer("Prod")

    output_folder = "output/resource_remove_links"
    the_lookup_csv = "id_lookup_prod.csv"
    bibid_file = "/Users/dwh2128/Documents/ACFA/TEST/ACFA-161-remove-links/acfa-161-remove-links.txt"

    # Read a list of bibids (csv)
    the_bibids = []
    with open(bibid_file) as ids:
        for row in csv.reader(ids):
            the_bibids.append(row[0])

    for b in the_bibids:

        try:
            repo, asid = asf.lookupByBibID(b, the_lookup_csv)
            print("Processing " + str(b) + "...")

            out_path_old = (output_folder + "/" + str(repo) + "_" + str(asid) +
                            "_old.json")
            out_path_new = (output_folder + "/" + str(repo) + "_" + str(asid) +
                            "_new.json")

            x = asf.getResource(repo, asid)

            # Save copy of existing object
            print("Saving data to " + out_path_old + "....")
            with open(out_path_old, "w+") as f:
                f.write(x)

            x_dict = json.loads(x)
            print(x_dict["ead_location"])
            if "ead_location" in x_dict:
                del x_dict["ead_location"]
            else:
                print("No URL to delete!")

            y = json.dumps(x_dict)
            # print(y)

            post = asf.postResource(repo, asid, y)
            print(post)

            # Save copy of new object
            print("Saving data to " + out_path_new + "....")

            with open(out_path_new, "w+") as f:
                f.write(y)

        except:
            print("Error: Could not process " + str(b))
            print(sys.exc_info())
            # raise

    quit()
Esempio n. 2
0
def main():

    # set to Prod | Dev | Test
    asf.setServer('Prod')

    bibid_file = "ead_bibids_20190520.txt"
    lookup_file = "id_lookup_prod_20190522.csv"
    outfile_loc = "ead_as_qc_reports/ead_as_qc_xml_PROD1"

    with open(bibid_file) as f:
        the_bibids = [line.rstrip('\n') for line in f]

    the_errors = []
    the_processed = []

    for a_bibid in the_bibids:
        print('Processing bibid: ' + a_bibid)
        if a_bibid:
            try:
                the_lookup = asf.lookupByBibID(a_bibid, lookup_file)
                the_repo = the_lookup[0]
                the_asid = the_lookup[1]
                the_processed.append(a_bibid)
            except:
                # Can't find in lookup
                the_repo = 0
                the_asid = 0
                the_errors.append(a_bibid)

        if (a_bibid and the_asid != 0):
            the_ead = asf.getEAD(the_repo, the_asid)

            the_filepath = outfile_loc + '/' + a_bibid + '_ead.xml'

            with open(the_filepath, "w") as myfile:
                myfile.write(the_ead)

    # Report results
    print('Processed ' + str(len(the_processed)) + ' records.')
    if len(the_errors) > 0:
        print('*** Warning: ' + str(len(the_errors)) +
              ' errors. Could not process id ' + ', '.join(the_errors) +
              ' ***')
Esempio n. 3
0
def harvestBatchEAD(ids_file, lookup_file, out_folder):
    bibidFile = ids_file
    lookupFile = lookup_file
    outFolder = out_folder

    with open(bibidFile) as f:
        the_bibids = [line.rstrip('\n') for line in f]

    the_errors = []
    the_processed = []

    for a_bibid in the_bibids:
        print('Processing bibid: ' + a_bibid)
        if a_bibid:
            try:
                the_lookup = asf.lookupByBibID(a_bibid, lookupFile)
                the_repo = the_lookup[0]
                the_asid = the_lookup[1]
                the_processed.append(a_bibid)
            except:
                # Can't find in lookup
                the_repo = 0
                the_asid = 0
                the_errors.append(a_bibid)

        # print(the_repo)
        # print(the_asid)

        if (a_bibid and the_asid != 0):
            the_ead = getSingleEAD(the_repo, the_asid)

            the_filepath = outFolder + '/' + a_bibid + '_ead.xml'
            with open(the_filepath, "w") as myfile:
                myfile.write(the_ead)

    # Report results
    print('Processed ' + str(len(the_processed)) + ' records.')
    if len(the_errors) > 0:
        print('*** Warning: ' + str(len(the_errors)) +
              ' errors. Could not process id ' + ', '.join(the_errors) +
              ' ***')
Esempio n. 4
0
def main():
    # Main code goes here.

    asf.setServer("Prod")

    lookup_csv = "id_lookup_prod.csv"
    id_file = "/Users/dwh2128/Documents/ACFA/TEST/ACFA-226-oclc/035s_20200915.txt"

    # Read a list of bibids and oclc strings
    the_data = []
    with open(id_file) as ids:
        for row in csv.reader(ids, delimiter="|"):
            the_data.append([row[0], row[1], row[2]])

    for a_row in the_data:
        bibid = a_row[0]
        print(bibid)
        str_2 = a_row[1]
        str_3 = a_row[2]
        try:
            repo, asid = asf.lookupByBibID(bibid, lookup_csv)

            x = asf.getResource(repo, asid)
            y = json.loads(x)

            user_defnd = y["user_defined"] if "user_defined" in y else {}
            user_defnd["string_2"] = str_2
            user_defnd["string_3"] = str_3

            print(user_defnd)

            y["user_defined"] = user_defnd

            z = json.dumps(y)
            post = asf.postResource(repo, asid, z)
            print(post)

        except Exception as e:
            print(e + ": Could not lookup " + str(bibid))
Esempio n. 5
0
def main():

    my_name = __file__

    # This makes sure the script can be run from any working directory and still find related files.
    my_path = os.path.dirname(__file__)

    sheet_id = '13OakaS0KHtxcaV9HGWDP9Zfnz9TVJR_9zGUnKrb90jk'  # test
    # sheet_id = '1tYOXSDFlkbX_revB_ULvhmCdvKkyzpipBTkYqYXcM38'
    # sheet_id = '1e43qKYvqGQFOMxA70U59yPKPs18y-k3ohRNdU-qrTH0'  # test
    # sheet_id = '1OhgJ4g-SWbmnms4b3ppe_0rBT7hz9jfQp6P8mADcatk'  # batch template doc

    container_sheet = dataSheet(sheet_id, 'containers!A:Z')

    marc_sheet = dataSheet(sheet_id, 'marc!A:Z')

    # Get a list of bibids from the Marc tab.
    # the_bibids = marc_sheet.getDataColumns()[0]
    the_bibids = marc_sheet.getDataColumns()[1]
    the_bibids.pop(0)
    the_bibids = list(set(the_bibids))
    print(the_bibids)

    #### TOP CONTAINERS ####

    the_heads = [
        'bibid', 'resource', 'uri', 'type', 'display_string', 'concat'
    ]
    the_rows = [the_heads]

    lookup_csv = os.path.join(my_path, 'id_lookup_prod.csv')
    for abib in the_bibids:
        print(abib)
        # Get repo and asid from bibid
        repo, asid = asf.lookupByBibID(abib, lookup_csv)

        print('Getting top containers for ' + str(repo) + ':' + str(asid))

        the_query = '/repositories/' + \
            str(repo) + '/resources/' + str(asid) + '/top_containers'

        # list of top containers
        the_refs = json.loads(asf.getResponse(the_query))
        print(the_refs)
        cnt = 0
        for r in the_refs:
            cnt += 1
            print(cnt)
            try:
                tc = json.loads(asf.getResponse(r['ref']))
                # print(tc)

                try:
                    bibid = tc['collection'][0]['identifier']
                except:
                    bibid = ''
                try:
                    resource = tc['collection'][0]['ref']
                except:
                    resource = ''
                try:
                    uri = tc['uri']
                except:
                    uri = ''
                try:
                    type = tc['type']
                except:
                    type = ''
                try:
                    display_string = tc['display_string']
                except:
                    display_string = ''
                try:
                    concat_str = str(tc['display_string'] + ' (' +
                                     uri.split('/')[4]) + ')'

                except:
                    concat_str = 'x'

                a_row = [
                    bibid, resource, uri, type, display_string, concat_str
                ]
                # print(a_row)
                the_rows.append(a_row)
            except:
                print(r)

    # Write results to google sheet
    container_sheet.clear()
    z = container_sheet.appendData(the_rows)
    print(z)
Esempio n. 6
0
def main():
    # Main code goes here.

    asf.setServer("Prod")

    on_site = False
    # set to True to get on-site note, False to get off-site note. See the_access_note var below.

    output_folder = "output/resource_on-site_access"

    lookup_csv = "id_lookup_prod.csv"

    # bibid_file = (
    #     "/Users/dwh2128/Documents/ACFA/TEST/ACFA-224-onsite-notes/acfa-224-list_3.csv"
    # )
    bibid_file = (
        "/Users/dwh2128/Documents/ACFA/TEST/ACFA-243-off-site/acfa-243_off-site.csv"
    )

    # Read a list of bibids (csv)
    the_bibids = []
    with open(bibid_file) as ids:
        for row in csv.reader(ids):
            the_bibids.append(row[0])

    if on_site == True:
        the_access_note = {
            "jsonmodel_type": "note_multipart",
            "label": "Restrictions on Access",
            "type": "accessrestrict",
            "rights_restriction": {"local_access_restriction_type": []},
            "subnotes": [
                {
                    "jsonmodel_type": "note_text",
                    "content": "This collection is located on-site.",
                    "publish": True,
                }
            ],
            "publish": True,
        }
    else:
        the_access_note = {
            "jsonmodel_type": "note_multipart",
            "label": "Restrictions on Access",
            "type": "accessrestrict",
            "rights_restriction": {"local_access_restriction_type": []},
            "subnotes": [
                {
                    "jsonmodel_type": "note_text",
                    "content": "This collection is located off-site. You will need to request this material at least three business days in advance to use the collection in the Rare Book and Manuscript Library reading room.",
                    "publish": True,
                }
            ],
            "publish": True,
        }

    for bib in the_bibids:

        try:
            repo, asid = asf.lookupByBibID(bib, lookup_csv)
        except:
            print("Error: No record found for " + str(bib) + ". Skipping...")
            continue

        out_path_old = output_folder + "/" + str(repo) + "_" + str(asid) + "_old.json"
        out_path_new = output_folder + "/" + str(repo) + "_" + str(asid) + "_new.json"

        the_resource = asf.getResource(repo, asid)

        # Save copy of existing object
        print("Saving data to " + out_path_old + "....")

        with open(out_path_old, "w+") as f:
            f.write(the_resource)

        the_data = json.loads(the_resource)

        # Test if there is already an access restriction note.
        has_note = False
        for a_note in the_data["notes"]:
            try:
                if a_note["type"] == "accessrestrict":
                    has_note = True
            except KeyError:
                print("Note has no type -- skipping.")

        if has_note == True:
            print(str(bib) + " - Warning: Already has access note.")
        # else:
        the_data["notes"].append(the_access_note)

        the_new_resource = json.dumps(the_data)

        # Save copy of new object
        print("Saving data to " + out_path_new + "....")

        with open(out_path_new, "w+") as f:
            f.write(the_new_resource)

        try:
            post = asf.postResource(repo, asid, the_new_resource)
            print(post)
        except:
            print(
                "Error: There was a problem posting resource "
                + str(repo)
                + ":"
                + str(asid)
                + "!"
            )

    quit()
Esempio n. 7
0
def main():

    asf.setServer("Prod")

    # the_lookup_csv = "id_lookup_TEST.csv"  # test
    the_lookup_csv = "id_lookup_prod.csv"  # test

    output_folder = "output/resource_language_encode"

    the_sheet = dataSheet("1eTPY7AbDvjDU-lzK2VQruvZAvlGkAJZglh2JrruPvdg", "Test6!A:Z")

    the_data = the_sheet.getData()

    the_new_data = []
    the_new_data.append(the_data.pop(0))

    counter = 0

    for a_row in the_data:

        counter += 1
        print(" ")
        print(counter)

        the_new_row = a_row
        the_bibid = a_row[0]
        the_041 = a_row[1]
        the_string = a_row[3]

        res_info = asf.lookupByBibID(the_bibid, the_lookup_csv)

        if res_info:
            out_path_old = (
                output_folder
                + "/"
                + str(res_info[0])
                + "_"
                + str(res_info[1])
                + "_old.json"
            )
            out_path_new = (
                output_folder
                + "/"
                + str(res_info[0])
                + "_"
                + str(res_info[1])
                + "_new.json"
            )

            # pull down the resource
            the_resource = asf.getResource(res_info[0], res_info[1])

            # Save copy of existing object
            print("Saving data to " + out_path_old + "....")

            with open(out_path_old, "w+") as f:
                f.write(the_resource)

            res_dict = json.loads(the_resource)

            langmaterials = res_dict["lang_materials"]

            # Collect encoded languages already present. There should be just one but not guaranteed, so make a list.
            primary_langs = []
            for n in langmaterials:
                try:
                    if n["language_and_script"]:
                        # print("YES")
                        primary_langs.append(n["language_and_script"]["language"])
                except:
                    print("Exception!")

            print("old:")
            print(primary_langs)

            print("new:")
            langs_parsed = language_lookup(the_string)
            print(langs_parsed)

            print("to add: ")
            langs_diff = diff(langs_parsed, primary_langs)
            print(langs_diff)

            if len(langs_diff) > 0:

                for l in langs_diff:
                    res_dict["lang_materials"].append(make_language_note(l))

                new_resource = json.dumps(res_dict)
                # Save new object
                print("Saving data to " + out_path_new + "....")

                with open(out_path_new, "w+") as f:
                    f.write(new_resource)

                # Post new resource back to API

                print("Posting data for " + str(res_info[0]) + " : " + str(res_info[1]))
                try:
                    post = asf.postResource(res_info[0], res_info[1], new_resource)
                    print(post)
                except:
                    print(
                        "Error: There was a problem posting resource "
                        + str(res_info[0])
                        + ":"
                        + str(res_info[1])
                        + "!"
                    )
                    langs_diff.append("[ERROR]")

            else:
                print("No new languages to add. Skipping.")

            the_new_row.append(",".join(langs_diff))
            the_new_data.append(the_new_row)

    the_sheet.clear()
    the_sheet.appendData(the_new_data)