def update_sc_components(*field): values = admin.login() csvfile = admin.opencsvdict() fields = field for row in csvfile: record_uri = row[fields[0]] fieldlist = fields[1:] print(fieldlist) print(record_uri) if 'position' in fieldlist: record_json = requests.get(values[0] + record_uri, headers=values[1]).json() for field in fieldlist: for key, value in row.items(): if field == key: print(value) position = int(row['position']) record_json['instances'][position]['sub_container'][ key] = value else: record_json = requests.get(values[0] + record_uri, headers=values[1]).json() for field in fieldlist: for key, value in row.items(): if field == key: print(value) record_json['instances'][0]['sub_container'][ key] = value record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update)
def create_container_profiles(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: name = row[0] extent_dimension = row[1] height = row[2] width = row[3] depth = row[4] dimension_units = row[5] #takes data from spreadsheet and builds JSON new_container_profile = { 'jsonmodel_type': 'container_profile', 'name': name, 'extent_dimension': extent_dimension, 'height': height, 'width': width, 'depth': depth, 'dimension_units': dimension_units } container_profile_data = json.dumps(new_container_profile) #Posts JSON to ArchivesSpace create_profile = requests.post(values[0] + '/container_profiles', headers=values[1], data=container_profile_data).json() #Prints what is happening to IDLE window - will add an error log as well print(create_profile)
def delete_do_instance(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: archival_object_uri = row[0] digital_object_uri = row[1] #may or may not need this...could just loop through instances and check for DOs, then delete #OR could find a specific digital object instance I want to delete try: archival_object_json = requests.get(values[0] + archival_object_uri, headers=values[1]).json() instance_list = list(archival_object_json['instances']) if digital_object_uri == '': for instance in instance_list: if instance['instance_type'] == 'digital_object': archival_object_json['instances'].remove(instance) archival_object_data = json.dumps(archival_object_json) archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json() admin.writetxt(txtfile, archival_object_update) print(archival_object_update) else: for instance in instance_list: if 'digital_object' in instance: if instance['digital_object'] == {'ref': digital_object_uri}: archival_object_json['instances'].remove(instance) archival_object_data = json.dumps(archival_object_json) archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json() admin.writetxt(txtfile, archival_object_update) print(archival_object_update) except: txtfile.write('error, could not update ' + str(archival_object_uri)) continue
def create_records(jsonmodel_type, *subrecords): values = admin.login() csvfile = admin.opencsvdict() data_dict = schema.parse_schema() #Loops through each file in the CSV for row in csvfile: new_record = {'jsonmodel_type': jsonmodel_type} for k, v in row.items(): if k in data_dict[jsonmodel_type]: new_record[k] = v if 'publish' in row.keys(): if row['publish'] == 'True': new_record['publish'] = True elif row['publish'] == 'False': new_record['publish'] = False newdict = dict.fromkeys(subrecords) for subrecord in subrecords: newdict[subrecord] = [{}] if subrecord in data_dict[jsonmodel_type]: print(data_dict[jsonmodel_type][subrecord]) for key in data_dict[jsonmodel_type][subrecord].keys(): if key in row.keys(): for member in newdict[subrecord]: member.update({key: row[key]}) new_record.update(newdict) pprint.pprint(new_record) record_data = json.dumps(new_record) record_create = requests.post(values[0] + row['repo_uri'] + '/' + jsonmodel_type + 's', headers=values[1], data=record_data).json() print(record_create)
def get_global_ids(record_type): values = admin.login() output = admin.opentxt() get_ids = requests.get(values[0] + '/' + str(record_type) + '?all_ids=true', headers=values[1]).json() output.write(get_ids)
def create_resources(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: repo_uri = row[0] identifier = row[1] title = row[2] language = row[3] level = row[4] begin_date = row[5] end_date = row[6] date_type = row[7] date_label = row[8] extent_type = row[9] extent_portion = row[10] extent_number = row[11] container_summary = row[12] new_resource = {'id_0': identifier, 'title': title, 'language': language, 'level': level, 'dates' : [{'begin': begin_date, 'end': end_date, 'date_type': date_type, 'label': date_label, 'jsonmodel_type': 'date'}], 'extents': [{'extent_type': extent_type, 'portion': extent_portion, 'number': extent_number, 'container_summary': container_summary, 'jsonmodel_type': 'extent'}], 'repository': {'ref': repo_uri}, 'jsonmodel_type': 'resource'} resource_data = json.dumps(new_resource) print(resource_data) resource_create = requests.post(values[0] + repo_uri + '/resources', headers=values[1], data=resource_data).json() print(resource_create)
def etv_ead(): values = admin.login() inputfile = admin.opentxtin() outputfile = admin.opentxt() dirpath = admin.setdirectory() print('Downloading EAD files to directory') for ead_uri in inputfile: print('Retrieving' + str(ead_uri).rstrip()) get_ead = requests.get(values[0] + ead_uri + '.xml?include_unpublished=true', headers=values[1], stream=True).text #Finds URLs with 2-digit repo ids if re.search(r'[0-9]', ead_uri[15]): outfile = admin.openxml(dirpath, ead_uri[39:].rstrip()) outfile.write(str(get_ead).rstrip()) #Others - assumes it's a 1-digit repo id. How many institutions will have more than 99 repositories? else: outfile = admin.openxml(dirpath, ead_uri[38:].rstrip()) outfile.write(str(get_ead).rstrip()) '''the subprocess call cannot take the EAD from AS directly as input. First need to save the file, and then run the transformation over each file''' print('Done!') print('Transforming EAD files to Yale Best Practices guidelines') filelist = os.listdir(dirpath) os.makedirs(dirpath + '/outfiles') for file in filelist: #finds all the EAD files in the working directory if file[-3:] == 'xml': #haven't changed the hard coding of the command or the xsl file yet subprocess.run([ "java", "-cp", "/usr/local/Cellar/saxon/9.8.0.4/libexec/saxon9he.jar", "net.sf.saxon.Transform", "-s:" + dirpath + '/' + file, "-xsl:" + dirpath + "/transformations/yale.aspace_v112_to_yalebpgs.xsl", "-o:" + dirpath + '/outfiles/' + file[:-4] + "_out.xml" ]) '''next we need to validate each output file against the EAD 2002 schema and the local Yale schematron''' print('Done!') print('Validating transformations against EAD 2002 and Schematron schemas') newfilelist = os.listdir(dirpath + '/outfiles') for outfile in newfilelist: subprocess.Popen([ "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar", "-s", dirpath + "/transformations/yale.aspace.ead2002.sch", dirpath + '/outfiles/' + outfile ], stdout=outputfile, stderr=subprocess.PIPE, encoding='utf-8') subprocess.Popen([ "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar", dirpath + '/outfiles/' + outfile ], stdout=outputfile, stderr=subprocess.PIPE, encoding='utf-8') print('All Done! Check outfile for validation report')
def create_instances(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: archival_object_uri = row[0] top_container_uri = row[1] child_type = row[2] child_indicator = row[3] grandchild_type = row[4] grandchild_indicator = row[5] instance_type = row[6] archival_object_json = requests.get(values[0] + archival_object_uri, headers=values[1]).json() if grandchild_type != '': new_instance = { "instance_type": instance_type, "jsonmodel_type": "instance", "sub_container": { "jsonmodel_type": "sub_container", "indicator_2": child_indicator, "type_2": child_type, "indicator_3": grandchild_indicator, "type_3": grandchild_type, "top_container": { "ref": top_container_uri } } } elif child_type != '': new_instance = { "instance_type": instance_type, "jsonmodel_type": "instance", "sub_container": { "jsonmodel_type": "sub_container", "indicator_2": child_indicator, "type_2": child_type, "top_container": { "ref": top_container_uri } } } else: new_instance = { "instance_type": instance_type, "jsonmodel_type": "instance", "sub_container": { "jsonmodel_type": "sub_container", "top_container": { "ref": top_container_uri } } } archival_object_json["instances"].append(new_instance) archival_object_data = json.dumps(archival_object_json) archival_object_update = requests.post( values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json() print(archival_object_update)
def get_enumerations(): values = admin.login() output = admin.opentxt() enumerations = requests.get(values[0] + '/config/enumerations?all_ids=true', headers=values[1]).json() json.dump(enumerations, output, indent=4, separators=(',', ':')) output.close()
def create_repositories(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: repo_name = row[0] record_json = {'repo_name': repo_name} record_data = json.dumps(record_json) record_update = requests.delete(values[0] + '/repositories', headers=values[1], data=record_data).json() print(record_update)
def delete_subrecord_components(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: record_uri = row[0] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() record_data = json.dumps(record_json) record_update = requests.delete(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update)
def create_note_bibliography(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: record_uri = row[0] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update)
def export_ead(repo_id, resource): values = admin.login() output = admin.opentxt() get_ead = requests.get(values[0] + '/repositories/' + str(repo_id) + '/resource_descriptions/' + str(resource) + '.xml?include_unpublished=true', headers=values[1], stream=True).text output.write(str(get_ead)) output.close()
def create_top_containers(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: barcode = row[0] indicator = row[1] container_profile_uri = row[2] locations = row[3] start_date = row[4] repo_num = row[5] if barcode != '': create_tc = { 'barcode': barcode, 'container_profile': { 'ref': container_profile_uri }, 'indicator': indicator, 'container_locations': [{ 'jsonmodel_type': 'container_location', 'status': 'current', 'start_date': start_date, 'ref': locations }], 'jsonmodel_type': 'top_container', 'repository': { 'ref': repo_num } } else: create_tc = { 'container_profile': { 'ref': container_profile_uri }, 'indicator': indicator, 'container_locations': [{ 'jsonmodel_type': 'container_location', 'status': 'current', 'start_date': start_date, 'ref': locations }], 'jsonmodel_type': 'top_container', 'repository': { 'ref': repo_num } } tcdata = json.dumps(create_tc) tcupdate = requests.post(values[0] + repo_num + '/top_containers', headers=values[1], data=tcdata).json() print(tcupdate)
def get_enums(): values = admin.login() #scheme = parse_schema() enumerations = requests.get(values[0] + '/config/enumerations?all_ids=true', headers=values[1]).json() for enumeration in enumerations: #want to match the schema keys (and subkeys) with the names pprint.pprint(enumeration['name']) #if there is a match want to append the whole valuelist as a value of the schema key/subkeys pprint.pprint(enumeration['values'])
def link_agent(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: agent_uri = row[0] description_uri = row[1] description_json = requests.get(values[0] + description_uri, headers=values[1]).json() description_json['linked_agents'].append({'ref': agent_uri}) description_data = json.dumps(description_json) description_post = requests.post(values[0] + description_uri, headers=values[1], data=description_data).json() print(description_post)
def create_external_documents(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: record_uri = row[0] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() new_ext_doc = {'jsonmodel_type': 'external_document'} record_json['external_documents'].append(new_ext_doc) record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update)
def update_subject_component(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: subject_uri = row[0] component_to_update = row[1] updated_text = row[2] subject_json = requests.get(values[0] + '/subjects/' + subject_uri, headers=values[1]).json() subject_json[component_to_update] = updated_text subject_data = json.dumps(subject_json) subject_update = requests.post(values[0] + subject_uri, headers=values[1], data=subject_data).json() print(subject_update)
def link_records(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: classification = row[0] record = row[1] new_rec_link = requests.get(values[0] + classification, headers=values[1]).json() new_rec_link['linked_records'].append({'ref': record}) print(new_rec_link) new_link_json = json.dumps(new_rec_link) new_link_post = requests.post(values[0] + classification, headers=values[1], data=new_link_json).json() print(new_link_post)
def create_file_versions(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: record_uri = row[0] file_uri = row[1] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() new_file_version = {'file_uri': file_uri, 'jsonmodel_type': 'file_version'} record_json['file_versions'].append(new_file_version) record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update)
def update_record_component(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: record_uri = row[0] component_to_update = row[1] updated_text = row[2] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() record_json[component_to_update] = updated_text record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update)
def create_digital_objects(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: repo_uri = row[0] digital_object_id = row[1] title = row[2] new_do = {'digital_object_id': digital_object_id, 'jsonmodel_type': 'digital_object', 'title': title, 'repository': {'ref': repo_uri}} do_data = json.dumps(new_do) do_create = requests.post(values[0] + repo_uri + '/digital_objects', headers=values[1], data=do_data).json() print(do_create)
def create_top_level(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: identifier = row[0] title = row[1] description = row[2] repository = row[3] new_class_term = {"identifier": identifier, "title": title, "description": description, "publish": True, "repository": {'ref': repository}} new_class_json = json.dumps(new_class_term) new_class_post = requests.post(values[0] + '/repositories/12/classifications', headers=values[1], data=new_class_json).json() print(new_class_post)
def create_dig_object_components(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: repo_uri = row[0] parent_uri = row[1] component_id = row[2] title = row[3] new_doc = {'component_id': component_id, 'title': title, 'parent': {'ref': parent_uri}, 'repository': {'ref': repo_uri}, 'jsonmodel_type': 'digital_object_component'} doc_data = json.dumps(new_doc) doc_create = requests.post(values[0] + repo_uri + '/digital_object_components', headers=values[1], data=doc_data).json() print(doc_create)
def create_accessions(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: repo_uri = row[0] identifier = row[1] title = row[2] accession_date = row[3] new_accession = {'id_0': identifier, 'title': title, 'accession_date': accession_date, 'repository': {'ref': repo_uri}, 'jsonmodel_type': 'accession'} accession_data = json.dumps(new_accession) accession_create = requests.post(values[0] + repo_uri + '/accessions', headers=values[1], data=accession_data).json() print(accession_create)
def export_eads(repo_id): values = admin.login() infile = admin.readtxt() dirpath = admin.setdirectory() for resource in infile: get_ead = requests.get(values[0] + '/repositories/' + str(repo_id) + '/resource_descriptions/' + str(resource) + '.xml?include_unpublished=true', headers=values[1], stream=True).text outfile = admin.openxml(dirpath, resource) outfile.write(str(get_ead)) outfile.close() input.close()
def update_tc_component(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: top_container_uri = row[0] component_to_update = row[1] update_text = row[2] tc_json = requests.get(values[0] + top_container_uri, headers=values[1]).json() tc_json[component_to_update] = update_text tc_data = json.dumps(tc_json) tc_update = requests.post(values[0] + top_container_uri, headers=values[1], data=tc_data).json() print(tc_update)
def update_record_pub_status(): values = admin.login() csvfile = admin.opencsv() for row in csvfile: record_uri = row[0] #1 for publish, 0 for unpublish updated_status = row[1] record_json = requests.get(values[0] + record_uri, headers=values[1]).json() if updated_status == '1': record_json['publish'] = True elif updated_status == '0': record_json['publish'] = False record_data = json.dumps(record_json) record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json() print(record_update)
def delete_records(): values = admin.login() csvfile = admin.opencsv() txtfile = admin.opentxt() for row in csvfile: record_uri = row[0] try: record_json = requests.get(values[0] + record_uri, headers=values[1]).json() record_data = json.dumps(record_json) delete = requests.delete(values[0] + record_uri, headers=values[1], data=record_data).json() admin.writetxt(txtfile, delete) print(delete) except: txtfile.write('error, could not delete ' + str(record_uri)) continue
def get_global_json(record_type): values = admin.login() output = admin.opentxt() get_ids = requests.get(values[0] + '/' + str(record_type) + '?all_ids=true', headers=values[1]).json() # print(get_ids) x = 0 for i in get_ids: x = x + 1 resource = requests.get(values[0] + '/' + str(record_type) + '/' + str(i), headers=values[1]).json() json.dump(resource, output, indent=4, separators=(',', ':')) print('Dumping ' + str(x) + ' of ' + str(len(get_ids))) output.close()