def main(): # Test functions here. from pprint import pprint server = 'Test' asf.setServer(server) # The resource to scan the_resource = (4, 6288) # A place to put output of saved json objects (optional) output_folder = 'output/replace_extrefs' # Retrieve all archival objects under a given resource x = asf.getResponse('/repositories/' + str(the_resource[0]) + '/resources/' + str(the_resource[1]) + '/ordered_records') y = json.loads(x)['uris'] # Select only the ones that are items or files, and add to a list the_refs = [r['ref'] for r in y if r['level'] in ['item', 'file']] cnt = 0 for a_ref in the_refs: ref_decomposed = a_ref.split('/') repo, asid = ref_decomposed[2], ref_decomposed[4] ref_json = asf.getArchivalObject(repo, asid) out_path = output_folder + '/' + str(repo) + '_' + str(asid) + '.json' data_old = ref_json # The regex substitution repl = re.subn(r'<extref\s+type=\\"simple\\"\s+href=', r'<extref xlink:type=\"simple\" xlink:href=', ref_json, flags=re.DOTALL) if repl[1] > 0: # [1] is the count of replacements from subn # there is a change # Save copy of existing object print('Saving data to ' + out_path + '....') with open(out_path, "w+") as f: f.write(data_old) data_new = repl[0] cnt += 1 print('Posting ' + str(repo) + '_' + str(asid) + ' to ' + server) z = asf.postArchivalObject(repo, asid, data_new) print(z) print(' ') print('Total replacements: ' + str(cnt))
def main(): SERVER = "Prod" # test # SERVER = "Prod" asf.setServer(SERVER) sheet_id = '1Jbdhda0HbmHKJ7COOJ3CBzdMwpSeIbYHyXzr179ETpI' read_sheet = dataSheet(sheet_id, 'TEST!A:Z') # Test write_sheet = dataSheet(sheet_id, 'Output!A:Z') the_data = read_sheet.getData() the_data.pop(0) # print(the_refs) the_output = [] for r in the_data: repo = r[1] ref = r[2] extref_old = r[3] extref_new = r[5] the_ao = json.loads(asf.getArchivalObjectByRef(repo, ref)) asid = the_ao['uri'].split('/')[4] print("asid: " + str(asid)) the_notes = json.dumps(the_ao['notes']) # fix problem of leading space in href the_new_notes = the_notes.replace('xlink:href=\\" http', 'xlink:href=\\"http') # replace old url with new one the_new_notes = the_new_notes.replace(extref_old, extref_new) print(the_new_notes) the_ao['notes'] = json.loads(the_new_notes) pprint(the_ao) x = asf.postArchivalObject(repo, asid, json.dumps(the_ao)) out_row = [SERVER, repo, asid, ref, extref_old, extref_new, str(x)] print(out_row) the_output.append(out_row) # write_sheet.clear() write_sheet.appendData(the_output) quit()
def main(): # SERVER = "Test" # test SERVER = "Prod" asf.setServer(SERVER) sheet_id = '1OABHEJF1jqA1vlbW5yTENry5W7YqKlag5nJDJ9ouCzg' # read_sheet = dataSheet(sheet_id, 'Test!A:Z') # Test read_sheet = dataSheet(sheet_id, 'Prod!A:Z') # Test write_sheet = dataSheet(sheet_id, 'output!A:Z') the_refs = read_sheet.getDataColumns()[0] # print(the_refs) the_output = [] for r in the_refs: the_ao = json.loads(asf.getArchivalObjectByRef(2, r)) asid = the_ao['uri'].split('/')[4] old_date = str(the_ao['dates'][0]['begin']) new_ao = fix_begin_date(2, the_ao) new_date = str(new_ao['dates'][0]['begin']) print("asid: " + str(asid)) x = asf.postArchivalObject(2, asid, json.dumps(new_ao)) out_row = [SERVER, r, asid, old_date, new_date, str(x)] # print(out_row) the_output.append(out_row) write_sheet.clear() write_sheet.appendData(the_output) quit() x = fix_begin_date(2, 'b2ec9ce511e4212ebb145fb909ca85bd') print(x) pprint( json.loads( asf.getArchivalObjectByRef(2, 'b2ec9ce511e4212ebb145fb909ca85bd'))) quit()
def main(): asf.setServer('Prod') id_file = '/Users/dwh2128/Documents/ACFA/TEST/ACFA-147-hrw-access-restrictions/acfa-147-aos_UNVETTED.csv' output_folder = 'output/archival_objects_accessrestrict' # Read a list of repo and object ids (csv) the_ids = [] ids = open(id_file) for row in csv.reader(ids): the_ids.append([row[0], row[1]]) ids.close() access_types = { 'unvetted': { 'vocab': 'TEMPORARILY UNAVAILABLE', 'text': '[Unvetted]' }, 'vetted': { 'vocab': 'AVAILABLE', 'text': '[Vetted, open]' } } # Set to 'vetted' or 'unvetted' the_type = 'unvetted' for an_obj in the_ids: out_path = output_folder + '/' + an_obj[0] + '_' + an_obj[ 1] + '_old.json' # read from API x = asf.getArchivalObjectByRef(an_obj[0], an_obj[1]) # Save copy of existing object print('Saving data to ' + out_path + '....') with open(out_path, "w+") as f: f.write(x) y = json.loads(x) asid = str( y['uri'].split('/')[-1]) # get the asid from the uri string. repo = str(an_obj[0]) print('Processing ' + str(repo) + ' - ' + str(asid) + '...') the_notes = y['notes'] # Test if there is already an accessrestrict has_accrestrict = False for an_item in the_notes: if an_item['type'] == 'accessrestrict': has_accrestrict = True if has_accrestrict == False: print('Adding access restrict note ...') the_access_note = { 'jsonmodel_type': 'note_multipart', 'publish': True, 'rights_restriction': { 'local_access_restriction_type': [access_types[the_type]['vocab']] }, 'subnotes': [{ 'content': access_types[the_type]['text'], 'jsonmodel_type': 'note_text', 'publish': True }], 'type': 'accessrestrict' } y['notes'].append(the_access_note) # the_notes = y['notes'] z = json.dumps(y) # print(z) post = asf.postArchivalObject(repo, asid, z) print(post) else: print('Already has access restrict note. Skipping!') print("Done!")
def main(): # Set value to switch to, publish (True) or unpublish (False) publish_value = False # Report changes to a spreadsheet? report_results = True asf.setServer('Prod') # A GSheet to post report to the_report_sheet = dataSheet( '1wNO0t2j5G9U0hUmb7E-jLd4T5skTs1aRxN7HrlyZwEI', 'aos_unpub3!A:Z') # A CSV of format <repo>,<refid> id_file = '/Users/dwh2128/Documents/ACFA/TEST/ACFA-141-unpublish-archival-objects/unpublish_aos_series_IIIA_PROD_p7.csv' # A folder to put json objects for auditing purposes output_folder = 'output/unpubs3' # Read a list of repo and object ids (csv) the_ids = [] ids = open(id_file) for row in csv.reader(ids): the_ids.append([row[0], row[1]]) ids.close() the_before_afters = [] the_heads = ['repo', 'asid', 'uid', 'title', 'before', 'after'] the_before_afters.append(the_heads) for an_obj in the_ids: out_path = output_folder + '/' + an_obj[0] + '_' + an_obj[ 1] + '_old.json' # read from API x = asf.getArchivalObjectByRef(an_obj[0], an_obj[1]) # Save copy of existing object print('Saving data to ' + out_path + '....') f = open(out_path, "w+") f.write(x) f.close() x = json.loads(x) asid = str( x['uri'].split('/')[-1]) # get the asid from the uri string. repo = str(an_obj[0]) title = x['title'] y = x old_value = x['publish'] y['publish'] = publish_value new_value = y['publish'] if new_value == old_value: new_value = '[no change]' the_before_afters.append( [repo, asid, an_obj[1], title, old_value, new_value]) # convert dict back to json for posting. z = json.dumps(y) if new_value != "[no change]": resp = asf.postArchivalObject(repo, asid, z) print(resp) else: print('No update: skipping record.') # Report changes to Google Sheet if report_results == True: print('Writing before/after info to sheet...') the_report_sheet.clear() the_report_sheet.appendData(the_before_afters) print("Done!")
def main(): # Set to Test | Dev | Prod asf.setServer('Prod') the_report_sheet = dataSheet( '1wNO0t2j5G9U0hUmb7E-jLd4T5skTs1aRxN7HrlyZwEI', 'notes!A:Z') id_file = 'replace_notes.csv' output_folder = 'output/notes' # Read a list of repo and object ids (csv) the_ids = [] ids = open(id_file) for row in csv.reader(ids): the_ids.append([row[0], row[1]]) ids.close() the_before_afters = [] the_heads = [ 'repo', 'asid', 'uid', 'title', 'note_cnt1', 'note_cnt2', 'status' ] the_before_afters.append(the_heads) for an_obj in the_ids: out_path = output_folder + '/' + an_obj[0] + '_' + an_obj[ 1] + '_old.json' # read from API print('getting data for ' + str(an_obj[0]) + ', ' + str(an_obj[1])) try: x = asf.getArchivalObjectByRef(an_obj[0], an_obj[1]) # Save copy of existing object print('Saving data to ' + out_path + '....') f = open(out_path, "w+") f.write(x) f.close() x = json.loads(x) asid = str( x['uri'].split('/')[-1]) # get the asid from the uri string. title = x['title'] repo = str(an_obj[0]) y = x my_notes_init = y['notes'] my_notes_new = [] if len(my_notes_init) > 0: if 'subnotes' in my_notes_init[0]: for a_note in my_notes_init: if 'subnotes' in a_note: if 'extref' in a_note['subnotes'][0]['content']: pass else: my_notes_new.append(a_note) if len(my_notes_new) == len(my_notes_init): the_status = "[no change]" else: the_status = "[deleted note]" y['notes'] = my_notes_new note_cnt1 = len(my_notes_init) note_cnt2 = len(y['notes']) the_before_afters.append([ an_obj[0], asid, an_obj[1], title, note_cnt1, note_cnt2, the_status ]) # convert dict back to json for posting. z = json.dumps(y) # Post the fixed object back to API. # (Comment these out for testing.) resp = asf.postArchivalObject(repo, asid, z) print(resp) except: print('Could not retrieve record ' + str(an_obj[1])) # Report changes to Google Sheet print('Writing before/after info to sheet...') the_report_sheet.clear() the_report_sheet.appendData(the_before_afters) print("Done!") quit()
def main(): asf.setServer('Prod') the_report_sheet = dataSheet( '1wNO0t2j5G9U0hUmb7E-jLd4T5skTs1aRxN7HrlyZwEI', 'ampersands!A:Z') id_file = 'archival_objects.csv' output_folder = 'output/archival_objects' # Read a list of repo and object ids (csv) the_ids = [] ids = open(id_file) for row in csv.reader(ids): the_ids.append([row[0], row[1]]) ids.close() # Search/replace patterns the_search_pattern = '&amp;' the_replace_pattern = '&' the_before_afters = [] # the fields to perform regex replace on. the_fields = ['title', 'display_string'] the_heads = ['repo', 'asid', 'uid', 'before', 'after'] the_before_afters.append(the_heads) for an_obj in the_ids: out_path = output_folder + '/' + an_obj[0] + '_' + an_obj[ 1] + '_old.json' # read from API x = asf.getArchivalObjectByRef(an_obj[0], an_obj[1]) # Save copy of existing object print('Saving data to ' + out_path + '....') f = open(out_path, "w+") f.write(x) f.close() x = json.loads(x) asid = str( x['uri'].split('/')[-1]) # get the asid from the uri string. repo = str(an_obj[0]) the_initial_values = [ str('{' + f + '_old:} ' + x[f]) for f in the_fields ] the_initial_values = "\n".join(the_initial_values) # print(the_initial_values) # TODO: function modifies x as well as y. Harmless but messy. y = regex_dict(x, the_fields, the_search_pattern, the_replace_pattern) the_new_values = [ str('{' + f + '_new:} ' + y[f] + ' ') for f in the_fields ] the_new_values = "\n".join(the_new_values) the_before_afters.append( [repo, asid, an_obj[1], the_initial_values, the_new_values]) # convert dict back to json for posting. z = json.dumps(y) # Post the fixed object back to API. # (Comment out these lines to test output without replacing.) post = asf.postArchivalObject(repo, asid, z) print(post) # Report changes to Google Sheet print('Writing before/after info to sheet...') the_report_sheet.clear() the_report_sheet.appendData(the_before_afters) print("Done!")