def main(): # Main code goes here. asf.setServer("Prod") output_folder = "output/resource_remove_links" the_lookup_csv = "id_lookup_prod.csv" bibid_file = "/Users/dwh2128/Documents/ACFA/TEST/ACFA-161-remove-links/acfa-161-remove-links.txt" # Read a list of bibids (csv) the_bibids = [] with open(bibid_file) as ids: for row in csv.reader(ids): the_bibids.append(row[0]) for b in the_bibids: try: repo, asid = asf.lookupByBibID(b, the_lookup_csv) print("Processing " + str(b) + "...") out_path_old = (output_folder + "/" + str(repo) + "_" + str(asid) + "_old.json") out_path_new = (output_folder + "/" + str(repo) + "_" + str(asid) + "_new.json") x = asf.getResource(repo, asid) # Save copy of existing object print("Saving data to " + out_path_old + "....") with open(out_path_old, "w+") as f: f.write(x) x_dict = json.loads(x) print(x_dict["ead_location"]) if "ead_location" in x_dict: del x_dict["ead_location"] else: print("No URL to delete!") y = json.dumps(x_dict) # print(y) post = asf.postResource(repo, asid, y) print(post) # Save copy of new object print("Saving data to " + out_path_new + "....") with open(out_path_new, "w+") as f: f.write(y) except: print("Error: Could not process " + str(b)) print(sys.exc_info()) # raise quit()
def main(): # set to Prod | Dev | Test asf.setServer('Prod') bibid_file = "ead_bibids_20190520.txt" lookup_file = "id_lookup_prod_20190522.csv" outfile_loc = "ead_as_qc_reports/ead_as_qc_xml_PROD1" with open(bibid_file) as f: the_bibids = [line.rstrip('\n') for line in f] the_errors = [] the_processed = [] for a_bibid in the_bibids: print('Processing bibid: ' + a_bibid) if a_bibid: try: the_lookup = asf.lookupByBibID(a_bibid, lookup_file) the_repo = the_lookup[0] the_asid = the_lookup[1] the_processed.append(a_bibid) except: # Can't find in lookup the_repo = 0 the_asid = 0 the_errors.append(a_bibid) if (a_bibid and the_asid != 0): the_ead = asf.getEAD(the_repo, the_asid) the_filepath = outfile_loc + '/' + a_bibid + '_ead.xml' with open(the_filepath, "w") as myfile: myfile.write(the_ead) # Report results print('Processed ' + str(len(the_processed)) + ' records.') if len(the_errors) > 0: print('*** Warning: ' + str(len(the_errors)) + ' errors. Could not process id ' + ', '.join(the_errors) + ' ***')
def harvestBatchEAD(ids_file, lookup_file, out_folder): bibidFile = ids_file lookupFile = lookup_file outFolder = out_folder with open(bibidFile) as f: the_bibids = [line.rstrip('\n') for line in f] the_errors = [] the_processed = [] for a_bibid in the_bibids: print('Processing bibid: ' + a_bibid) if a_bibid: try: the_lookup = asf.lookupByBibID(a_bibid, lookupFile) the_repo = the_lookup[0] the_asid = the_lookup[1] the_processed.append(a_bibid) except: # Can't find in lookup the_repo = 0 the_asid = 0 the_errors.append(a_bibid) # print(the_repo) # print(the_asid) if (a_bibid and the_asid != 0): the_ead = getSingleEAD(the_repo, the_asid) the_filepath = outFolder + '/' + a_bibid + '_ead.xml' with open(the_filepath, "w") as myfile: myfile.write(the_ead) # Report results print('Processed ' + str(len(the_processed)) + ' records.') if len(the_errors) > 0: print('*** Warning: ' + str(len(the_errors)) + ' errors. Could not process id ' + ', '.join(the_errors) + ' ***')
def main(): # Main code goes here. asf.setServer("Prod") lookup_csv = "id_lookup_prod.csv" id_file = "/Users/dwh2128/Documents/ACFA/TEST/ACFA-226-oclc/035s_20200915.txt" # Read a list of bibids and oclc strings the_data = [] with open(id_file) as ids: for row in csv.reader(ids, delimiter="|"): the_data.append([row[0], row[1], row[2]]) for a_row in the_data: bibid = a_row[0] print(bibid) str_2 = a_row[1] str_3 = a_row[2] try: repo, asid = asf.lookupByBibID(bibid, lookup_csv) x = asf.getResource(repo, asid) y = json.loads(x) user_defnd = y["user_defined"] if "user_defined" in y else {} user_defnd["string_2"] = str_2 user_defnd["string_3"] = str_3 print(user_defnd) y["user_defined"] = user_defnd z = json.dumps(y) post = asf.postResource(repo, asid, z) print(post) except Exception as e: print(e + ": Could not lookup " + str(bibid))
def main(): my_name = __file__ # This makes sure the script can be run from any working directory and still find related files. my_path = os.path.dirname(__file__) sheet_id = '13OakaS0KHtxcaV9HGWDP9Zfnz9TVJR_9zGUnKrb90jk' # test # sheet_id = '1tYOXSDFlkbX_revB_ULvhmCdvKkyzpipBTkYqYXcM38' # sheet_id = '1e43qKYvqGQFOMxA70U59yPKPs18y-k3ohRNdU-qrTH0' # test # sheet_id = '1OhgJ4g-SWbmnms4b3ppe_0rBT7hz9jfQp6P8mADcatk' # batch template doc container_sheet = dataSheet(sheet_id, 'containers!A:Z') marc_sheet = dataSheet(sheet_id, 'marc!A:Z') # Get a list of bibids from the Marc tab. # the_bibids = marc_sheet.getDataColumns()[0] the_bibids = marc_sheet.getDataColumns()[1] the_bibids.pop(0) the_bibids = list(set(the_bibids)) print(the_bibids) #### TOP CONTAINERS #### the_heads = [ 'bibid', 'resource', 'uri', 'type', 'display_string', 'concat' ] the_rows = [the_heads] lookup_csv = os.path.join(my_path, 'id_lookup_prod.csv') for abib in the_bibids: print(abib) # Get repo and asid from bibid repo, asid = asf.lookupByBibID(abib, lookup_csv) print('Getting top containers for ' + str(repo) + ':' + str(asid)) the_query = '/repositories/' + \ str(repo) + '/resources/' + str(asid) + '/top_containers' # list of top containers the_refs = json.loads(asf.getResponse(the_query)) print(the_refs) cnt = 0 for r in the_refs: cnt += 1 print(cnt) try: tc = json.loads(asf.getResponse(r['ref'])) # print(tc) try: bibid = tc['collection'][0]['identifier'] except: bibid = '' try: resource = tc['collection'][0]['ref'] except: resource = '' try: uri = tc['uri'] except: uri = '' try: type = tc['type'] except: type = '' try: display_string = tc['display_string'] except: display_string = '' try: concat_str = str(tc['display_string'] + ' (' + uri.split('/')[4]) + ')' except: concat_str = 'x' a_row = [ bibid, resource, uri, type, display_string, concat_str ] # print(a_row) the_rows.append(a_row) except: print(r) # Write results to google sheet container_sheet.clear() z = container_sheet.appendData(the_rows) print(z)
def main(): # Main code goes here. asf.setServer("Prod") on_site = False # set to True to get on-site note, False to get off-site note. See the_access_note var below. output_folder = "output/resource_on-site_access" lookup_csv = "id_lookup_prod.csv" # bibid_file = ( # "/Users/dwh2128/Documents/ACFA/TEST/ACFA-224-onsite-notes/acfa-224-list_3.csv" # ) bibid_file = ( "/Users/dwh2128/Documents/ACFA/TEST/ACFA-243-off-site/acfa-243_off-site.csv" ) # Read a list of bibids (csv) the_bibids = [] with open(bibid_file) as ids: for row in csv.reader(ids): the_bibids.append(row[0]) if on_site == True: the_access_note = { "jsonmodel_type": "note_multipart", "label": "Restrictions on Access", "type": "accessrestrict", "rights_restriction": {"local_access_restriction_type": []}, "subnotes": [ { "jsonmodel_type": "note_text", "content": "This collection is located on-site.", "publish": True, } ], "publish": True, } else: the_access_note = { "jsonmodel_type": "note_multipart", "label": "Restrictions on Access", "type": "accessrestrict", "rights_restriction": {"local_access_restriction_type": []}, "subnotes": [ { "jsonmodel_type": "note_text", "content": "This collection is located off-site. You will need to request this material at least three business days in advance to use the collection in the Rare Book and Manuscript Library reading room.", "publish": True, } ], "publish": True, } for bib in the_bibids: try: repo, asid = asf.lookupByBibID(bib, lookup_csv) except: print("Error: No record found for " + str(bib) + ". Skipping...") continue out_path_old = output_folder + "/" + str(repo) + "_" + str(asid) + "_old.json" out_path_new = output_folder + "/" + str(repo) + "_" + str(asid) + "_new.json" the_resource = asf.getResource(repo, asid) # Save copy of existing object print("Saving data to " + out_path_old + "....") with open(out_path_old, "w+") as f: f.write(the_resource) the_data = json.loads(the_resource) # Test if there is already an access restriction note. has_note = False for a_note in the_data["notes"]: try: if a_note["type"] == "accessrestrict": has_note = True except KeyError: print("Note has no type -- skipping.") if has_note == True: print(str(bib) + " - Warning: Already has access note.") # else: the_data["notes"].append(the_access_note) the_new_resource = json.dumps(the_data) # Save copy of new object print("Saving data to " + out_path_new + "....") with open(out_path_new, "w+") as f: f.write(the_new_resource) try: post = asf.postResource(repo, asid, the_new_resource) print(post) except: print( "Error: There was a problem posting resource " + str(repo) + ":" + str(asid) + "!" ) quit()
def main(): asf.setServer("Prod") # the_lookup_csv = "id_lookup_TEST.csv" # test the_lookup_csv = "id_lookup_prod.csv" # test output_folder = "output/resource_language_encode" the_sheet = dataSheet("1eTPY7AbDvjDU-lzK2VQruvZAvlGkAJZglh2JrruPvdg", "Test6!A:Z") the_data = the_sheet.getData() the_new_data = [] the_new_data.append(the_data.pop(0)) counter = 0 for a_row in the_data: counter += 1 print(" ") print(counter) the_new_row = a_row the_bibid = a_row[0] the_041 = a_row[1] the_string = a_row[3] res_info = asf.lookupByBibID(the_bibid, the_lookup_csv) if res_info: out_path_old = ( output_folder + "/" + str(res_info[0]) + "_" + str(res_info[1]) + "_old.json" ) out_path_new = ( output_folder + "/" + str(res_info[0]) + "_" + str(res_info[1]) + "_new.json" ) # pull down the resource the_resource = asf.getResource(res_info[0], res_info[1]) # Save copy of existing object print("Saving data to " + out_path_old + "....") with open(out_path_old, "w+") as f: f.write(the_resource) res_dict = json.loads(the_resource) langmaterials = res_dict["lang_materials"] # Collect encoded languages already present. There should be just one but not guaranteed, so make a list. primary_langs = [] for n in langmaterials: try: if n["language_and_script"]: # print("YES") primary_langs.append(n["language_and_script"]["language"]) except: print("Exception!") print("old:") print(primary_langs) print("new:") langs_parsed = language_lookup(the_string) print(langs_parsed) print("to add: ") langs_diff = diff(langs_parsed, primary_langs) print(langs_diff) if len(langs_diff) > 0: for l in langs_diff: res_dict["lang_materials"].append(make_language_note(l)) new_resource = json.dumps(res_dict) # Save new object print("Saving data to " + out_path_new + "....") with open(out_path_new, "w+") as f: f.write(new_resource) # Post new resource back to API print("Posting data for " + str(res_info[0]) + " : " + str(res_info[1])) try: post = asf.postResource(res_info[0], res_info[1], new_resource) print(post) except: print( "Error: There was a problem posting resource " + str(res_info[0]) + ":" + str(res_info[1]) + "!" ) langs_diff.append("[ERROR]") else: print("No new languages to add. Skipping.") the_new_row.append(",".join(langs_diff)) the_new_data.append(the_new_row) the_sheet.clear() the_sheet.appendData(the_new_data)