class ASpace(): # this happens when you call ASpace() def __init__(self, **config): # Connect to ASpace using .archivessnake.yml self.client = ASnakeClient(**config) self.client.authorize() m = re.match(r'\(v?(.+\))', self.client.get('version').text) if m: self.version = m[1] else: self.version = 'unknown version' def __getattr__(self, attr): '''returns the JSONModelRelation representing the route with the same name as the attribute requested.''' if not attr.startswith('_'): return JSONModelRelation("/{}".format(attr), params={"all_ids": True}, client = self.client) @property def resources(self): '''return all resources from every repo.''' return ResourceRelation({}, self.client) @property def agents(self): '''returns an AgentRelation.''' return AgentRelation("/agents", {}, self.client) @property def users(self): '''returns a UserRelation.''' return UserRelation("/users", {}, self.client) def by_external_id(self, external_id, record_types=None): '''return any resources fetched from the 'by-external-id' route. Note: while the route will return differently depending on how many records are returned, this method deliberately flattens that out - it will _always_ return a generator, even if only one record is found.''' params = {"eid": external_id} if record_types: params['type[]'] = record_types res = self.client.get('by-external-id', params=params) if res.status_code == 404: return [] elif res.status_code == 300: # multiple returns, bare list of uris yield from (wrap_json_object({"ref": uri}, self.client) for uri in IndexedSet(res.json())) elif res.status_code == 200: # single obj, redirects to obj with 303->200 yield wrap_json_object(res.json(), self.client) else: raise ASnakeBadReturnCode("by-external-id call returned '{}'".format(res.status_code)) def from_uri(self, uri): '''returns a JSONModelObject representing the URI passed in''' return wrap_json_object(self.client.get(uri).json(), self.client)
def main(): client = ASnakeClient(baseurl='XXXX', username='******', password='******') client.authorize() changes = { 'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'], 'cubic_feet': ['Cubic Feet'], 'gigabytes': ['Gigabytes'] } res_records = (client.get('repositories/2/resources', params={'all_ids': True})).json() found_records = set([]) for record in tqdm(res_records): rec_uri = 'repositories/2/resources/{0}'.format(record) res_record = client.get(rec_uri).json() updated_record = deepcopy(res_record) try: extents = res_record['extents'] for ext_index, extent in enumerate(extents): for key, value in changes.items(): if extent['extent_type'] in value: updated_record['extents'][ext_index][ 'extent_type'] = key break else: pass if res_record['extents'] != updated_record['extents']: response = client.post(rec_uri, json=updated_record) if response.status_code == 200: logger.info('Extent change successfully pushed', rec=record, response=response) found_records.add(record) else: logger.info('Extent change failed', rec=record, response=response) else: pass except: pass print('{0} resource records checked; {1} records updated.'.format( len(res_records), len(found_records)))
def test_authorize(): client = ASnakeClient() # relies on default config, see ASnakeConfig class toke = client.authorize() assert isinstance(toke, str) assert len(toke) == 64 assert set(toke) <= set('0123456789abcdef') assert client.session.headers['X-ArchivesSpace-Session'] == toke # Try to get admin user info, should only work if we're authed as admin assert client.get('users/1').status_code == 200
import csv, json from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() def startCSV(CSV): '''Creates the CSV with field names and writes header''' fieldnames = [ 'lock_version', 'indicator', 'uri', 'collection_identifier', 'series_identifier' ] with open(CSV, 'w', newline='') as outputCSV: writer = csv.DictWriter(outputCSV, fieldnames=fieldnames) writer.writeheader() def addCSV(CSV, lock, ind, uri, coll_id, ser_id): '''Opens CSV, appends row''' fieldnames = [ 'lock_version', 'indicator', 'uri', 'collection_identifier', 'series_identifier' ] with open(CSV, 'a', newline='') as outputCSV: writer = csv.DictWriter(outputCSV, fieldnames=fieldnames) writer.writerow({ 'lock_version': lock, 'indicator': ind, 'uri': uri, 'collection_identifier': coll_id,
from asnake.aspace import ASpace import asnake.logging as logging logging.setup_logging(level='DEBUG', filename="remove_fake_wrapper.log", filemode="a") aspace = ASpace(baseurl="[ASPACE API URL]", username="******", password="******") #Log Into ASpace and set repo to RL aspace_client = ASnakeClient(baseurl="[ASPACE API URL]", username="******", password="******") aspace_client.authorize() #Set target repo repo = aspace_client.get("repositories/2").json() print("Logged into: " + repo['name']) rl_repo = aspace.repositories(2) #input is output of SQL query above input_csv = input("Path to CSV Input: ") #output will be input CSV plus some extra columns for reporting on actions taken, errors, etc. updated_resources_csv = input("Path to CSV Output: ") #Test if more than one direct child of Resource Object #Why? Don't want to assign all children to Resource if there are other sibling Components of the fake wrapper component def only_one_direct_child_of_resource_test(resource_object):
class ArchivesSpaceClient(object): """Client to get and receive data from ArchivesSpace.""" def __init__(self, baseurl, username, password, repo_id): self.client = ASnakeClient(baseurl=baseurl, username=username, password=password) self.repo_id = repo_id if not self.client.authorize(): raise ArchivesSpaceClientError( "Couldn't authenticate user credentials for ArchivesSpace") self.TYPE_LIST = { "family": ["agent_family", "agents/families"], "organization": ["agent_corporate_entity", "agents/corporate_entities"], "person": ["agent_person", "agents/people"], "component": [ "archival_object", "repositories/{repo_id}/archival_objects".format( repo_id=self.repo_id) ], "accession": [ "accession", "repositories/{repo_id}/accessions".format( repo_id=self.repo_id) ], "digital object": [ "digital_objects", "repositories/{repo_id}/digital_objects".format( repo_id=self.repo_id) ] } def send_request(self, method, url, data=None, **kwargs): """Base method for sending requests to ArchivesSpace.""" r = getattr(self.client, method)(url, data=json.dumps(data), **kwargs) if r.status_code == 200: return r.json() else: if r.json()["error"].get("id_0"): """Account for indexing delays by bumping up to the next accession number.""" id_1 = int(data["id_1"]) id_1 += 1 data["id_1"] = str(id_1).zfill(3) return self.create(data, "accession") raise ArchivesSpaceClientError( "Error sending {} request to {}: {}".format( method, url, r.json()["error"])) def retrieve(self, url, **kwargs): return self.send_request("get", url, **kwargs) def create(self, data, type, **kwargs): return self.send_request("post", self.TYPE_LIST[type][1], data, **kwargs) def update(self, uri, data, **kwargs): return self.send_request("post", uri, data, **kwargs) def get_or_create(self, type, field, value, last_updated, consumer_data): """ Attempts to find and return an object in ArchivesSpace. If the object is not found, creates and returns a new object. """ model_type = self.TYPE_LIST[type][0] endpoint = self.TYPE_LIST[type][1] query = json.dumps({ "query": { "field": field, "value": value, "jsonmodel_type": "field_query" } }) try: r = self.client.get("repositories/{}/search".format(self.repo_id), params={ "page": 1, "type[]": model_type, "aq": query }).json() if len(r["results"]) == 0: r = self.client.get(endpoint, params={ "all_ids": True, "modified_since": last_updated - 120 }).json() for ref in r: r = self.client.get("{}/{}".format(endpoint, ref)).json() if r[field] == str(value): return r["uri"] return self.create(consumer_data, type).get("uri") return r["results"][0]["uri"] except Exception as e: raise ArchivesSpaceClientError( "Error finding or creating object in ArchivesSpace: {}".format( e)) def next_accession_number(self): """ Finds the next available accession number by searching for accession numbers with the current year, and then incrementing. Assumes that accession numbers are in the format YYYY NNN, where YYYY is the current year and NNN is a zero-padded integer. """ current_year = str(date.today().year) try: query = json.dumps({ "query": { "field": "four_part_id", "value": current_year, "jsonmodel_type": "field_query" } }) r = self.client.get("repositories/{}/search".format(self.repo_id), params={ "page": 1, "type[]": "accession", "sort": "identifier desc", "aq": query }).json() number = "1" if r.get("total_hits") >= 1: if r["results"][0]["identifier"].split("-")[0] == current_year: id_1 = int(r["results"][0]["identifier"].split("-")[1]) id_1 += 1 number = str(id_1).zfill(3) return ":".join([current_year, number.zfill(3)]) except Exception as e: raise ArchivesSpaceClientError( "Error retrieving next accession number from ArchivesSpace: {}" .format(e))
total = len(filelist) count = 0 for f in filelist: f = f.replace('\n', '') count += 1 makeRow(getAo(f),f) print('Row added! - ' + str(count) + "/" + str(total)) # enter aspace login info config = configparser.ConfigParser() config.read('local_settings.cfg') baseurl= config.get('ArchivesSpace', 'baseURL') user = input('ArchivesSpace username: '******'ArchivesSpace password:'******'\a')
import json, time, runtime from asnake.client import ASnakeClient from asnake.client.web_client import ASnakeAuthError # Create a client client = ASnakeClient() client.authorize() # login, using default values # print instructions print ("This script will add the container_profiles included in a separate json file to ArchivesSpace.") input("Press Enter to continue...") # post container_profiles print ("The following container profiles have been added to ArchivesSpace:") jsonfile = open("containerProfiles.json") jsonfile = json.load(jsonfile) for container_profile in jsonfile: post = client.post("/container_profiles", json=container_profile).json() print (post) print ("You've just completed your first API POST. Congratulations!")
def main(ID, path=None, accession=None): if path == None: if not os.path.isdir(defaultPath): raise Exception("ERROR: default path " + defaultPath + " does not exist.") path = os.path.join(defaultPath, ID) if not os.path.isdir(path): raise Exception("ERROR: no " + ID + " directory exists for ingest in " + defaultPath) else: if not os.path.isdir(path): raise Exception("ERROR: " + str(path) + " is not a valid path.") print("Reading " + path) if accession == None: print("Building SIP...") SIP = SubmissionInformationPackage() SIP.create(ID) SIP.package(path) print("SIP " + SIP.bagID + " created.") else: print("Reading accession " + accession) import asnake.logging as logging from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() logging.setup_logging(stream=sys.stdout, level='INFO') call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}" accessionResponse = client.get(call).json() if len(accessionResponse["results"]) < 1: raise Exception("ERROR: Could not find accession with ID: " + accession) else: accessionObject = json.loads( accessionResponse["results"][0]["json"]) if "id_1" in accessionObject.keys(): accessionID = accessionObject["id_0"] + "-" + accessionObject[ "id_1"] if accession != accessionID: raise Exception( "ERROR: Could not find exact accession with ID: " + accession) if not "content_description" in accessionObject.keys(): raise Exception("ERROR: no content description in " + accessionID + " accession, " + accessionObject["uri"]) if len(accessionObject["related_resources"]) < 1: raise Exception("ERROR: no related resource for " + accessionID + " accession, " + accessionObject["uri"]) else: resource = client.get( accessionObject["related_resources"][0]["ref"]).json() creator = resource["title"] if not ID.lower() == resource["id_0"].lower(): raise Exception("ERROR: accession " + accessionID + " does not link to collection ID " + ID + ". Instead linked to " + resource["id_0"]) description = accessionObject["content_description"] print("Building SIP...") SIP = SubmissionInformationPackage() SIP.create(ID) SIP.package(path) print("SIP " + SIP.bagID + " created.") SIP.bag.info["Accession-Identifier"] = accessionID SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"] SIP.bag.info["Records-Creator"] = creator SIP.bag.info["Content-Description"] = description if "condition_description" in accessionObject.keys(): SIP.bag.info["Condition-Description"] = accessionObject[ "condition_description"] if "provenance" in accessionObject.keys(): SIP.bag.info["Provenance"] = accessionObject["provenance"] if "general_note" in accessionObject.keys(): SIP.bag.info["General-Note"] = accessionObject[ "general_note"] SIP.bag.info["Source-Location"] = path SIP.bag.info[ "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py" print("Writing checksums...") SIP.bag.save(manifests=True) print("SIP Saved!") # List files in txt for processing print("(not) Listing files for processing...") #listFiles(ID) if accession == None: SIP.extentLog( "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx" ) print("Logged ingest to DigitizationExtentTracker.") else: print("Updating accession " + accessionID) if "disposition" in accessionObject.keys(): accessionObject["disposition"] = accessionObject[ "disposition"] + "\n" + str(SIP.bagID) else: accessionObject["disposition"] = str(SIP.bagID) totalSize = SIP.size() inclusiveDates = SIP.dates() extent = { "jsonmodel_type": "extent", "portion": "whole", "number": str(totalSize[0]), "extent_type": str(totalSize[1]) } extentFiles = { "jsonmodel_type": "extent", "portion": "whole", "number": str(totalSize[2]), "extent_type": "Digital Files" } if inclusiveDates[0] == inclusiveDates[1]: date = { "jsonmodel_type": "date", "date_type": "inclusive", "label": "creation", "begin": inclusiveDates[0], "expression": inclusiveDates[0] } else: date = { "jsonmodel_type": "date", "date_type": "inclusive", "label": "creation", "begin": inclusiveDates[0], "end": inclusiveDates[1] } if "extents" in accessionObject.keys(): accessionObject["extents"].append(extent) accessionObject["extents"].append(extentFiles) else: accessionObject["extents"] = [extent, extentFiles] accessionObject["dates"].append(date) updateAccession = client.post(accessionObject["uri"], json=accessionObject) if updateAccession.status_code == 200: print("\tSuccessfully updated accession " + accessionID) else: print(updateAccession.text) print("\tERROR " + str(updateAccession.status_code) + "! Failed to update accession: " + accessionID) return SIP
def main(): client = ASnakeClient(baseurl='XXXX', username='******', password='******') client.authorize() catalog = { 'linear': ['linear_feet', 'Linear Feet', 'linear ft.', 'Linear Foot'], 'cubic': ['cubic_feet', 'Cubic Feet'], 'gb': ['gigabytes', 'Gigabytes'] } res_records = (client.get('repositories/2/resources', params={'all_ids': True})).json() data_list = [] print('Compiling resource records from API...') for record in tqdm(res_records): res_record = client.get( 'repositories/2/resources/{0}'.format(record)).json() try: extents = res_record['extents'] for x in extents: if x['extent_type'] == 'megabytes': data_list.append({ 'id': res_record['id_0'], 'amount': str(float(x['number']) / 1000), 'units': 'gigabytes' }) else: data_list.append({ 'id': res_record['id_0'], 'amount': x['number'], 'units': x['extent_type'] }) except: pass linear_ms = 0 linear_ua = 0 gb_ms = 0 gb_ua = 0 cubic_ms = 0 cubic_ua = 0 print('Analyzing extents in resource data...') for entry in data_list: try: if entry['id'].startswith( 'MS') and entry['units'] in catalog['linear']: linear_ms += float(entry['amount']) elif entry['id'].startswith( 'UA') and entry['units'] in catalog['linear']: linear_ua += float(entry['amount']) elif entry['id'].startswith( 'MS') and entry['units'] in catalog['gb']: gb_ms += float(entry['amount']) elif entry['id'].startswith( 'UA') and entry['units'] in catalog['gb']: gb_ua += float(entry['amount']) elif entry['id'].startswith( 'MS') and entry['units'] in catalog['cubic']: cubic_ms += float(entry['amount']) elif entry['id'].startswith( 'UA') and entry['units'] in catalog['cubic']: cubic_ua += float(entry['amount']) else: pass except: exception = input( 'Uh oh, looks like the analysis ran into a snag; most likely, ' 'a unit of extent for {0} ({1}) is not a pure number. Enter ' '\'stop\' to kill the process so you can fix the record. Alternatively, ' 'you can enter \'continue\' to skip this entry and keep the analysis ' 'going.'.format(entry['id'], entry['amount'])) if (exception.lower()).strip() == 'continue': pass elif (exception.lower()).strip() == 'stop': quit() report = { 'MS Linear feet': round(linear_ms, 2), 'UA Linear feet': round(linear_ua, 2), 'Total linear feet': round((linear_ua + linear_ms), 2), 'MS GB': round(gb_ms, 2), 'UA GB': round(gb_ua, 2), 'Total GB': round((gb_ms + gb_ua), 2), 'MS Cubic feet': round(cubic_ms, 2), 'UA Cubic feet': round(cubic_ua, 2), 'Total Cubic feet': round((cubic_ua + cubic_ms), 2) } print('Generating report as JSON...') with open(('extent_calculator_' + (datetime.datetime.today().strftime('%Y-%m-%d')) + '.json'), 'w') as f: json.dump(report, f)
def buildSelections(colID, refID=None, filter=None, date=False, verbose=False): client = ASnakeClient() client.authorize() collection = [] page = 1 outDir = "/media/SPE/uploads" if refID: url = "https://archives.albany.edu/catalog?f[record_parent_sim][]=" + refID + "&format=json&per_page=100" outFile = os.path.join(outDir, refID + ".json") descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") + "aspace_" + refID outDesc = os.path.join(outDir, "desc_" + refID + ".json") else: url = "https://archives.albany.edu/catalog?f[collection_number_sim][]=" + colID + "&format=json&per_page=100" outFile = os.path.join(outDir, colID.replace(".", "-") + ".json") descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") outDesc = os.path.join(outDir, "desc_" + colID.replace(".", "-") + ".json") if filter: url = url + "&" + filter print (descriptionURL + "?format=json") r = requests.get(descriptionURL + "?format=json", verify=False) print (r.status_code) with open(outDesc, 'w', encoding='utf-8', newline='') as f: json.dump(r.json()["response"], f, ensure_ascii=True, indent=4) def getPage(page, collection, url): r = requests.get(url + "&page=" + str(page), verify=False) print (r.status_code) for item in r.json()["response"]["docs"]: obj = {} obj["title"] = item["title_tesim"][0] obj["date"] = item["date_created_tesim"][0] #print (item) ref_id = item["archivesspace_record_tesim"][0] obj["thumb"] = "https://archives.albany.edu" + item["thumbnail_path_ss"] obj["url"] = "https://archives.albany.edu/concern/" + item["has_model_ssim"][0].lower() + "s/" + item["id"] record = client.get("repositories/2/find_by_id/archival_objects?ref_id[]=" + ref_id).json() ao = client.get(record["archival_objects"][0]["ref"]).json() print (ao["ref_id"]) dateNormal = ao["dates"][0]["begin"] if "end" in ao["dates"][0].keys(): dateNormal = dateNormal + "/" + ao["dates"][0]["end"] if "undated" in ao["dates"][0]["expression"].lower(): obj["date_normal"] = "9999" else: obj["date_normal"] = dateNormal if date: if not obj["date"].lower() == "undated": if obj["date"].lower().startswith("ca."): objDate = obj["date"].split(" ")[1] else: if "-" in obj["date"]: objDate = obj["date"].split("-")[0] else: objDate = obj["date"].split(" ")[0] print (objDate) try: if "-" in date: if int(objDate) >= int(date.split("-")[0]) and int(objDate) <= int(date.split("-")[1]): collection.append(obj) else: if int(objDate) < int(date): collection.append(obj) except: print ("Date Error: " + objDate) else: collection.append(obj) if r.json()["response"]["pages"]["last_page?"] == False: getPage(page + 1, collection, url) getPage(page, collection, url) #print (collection) sortedTitle = sorted(collection, key = lambda i: i['title'].split(" ")[0]) sortedCollection = sorted(sortedTitle, key = lambda i: i['date_normal'].split(" ")[0]) print (len(sortedCollection)) with open(outFile, 'w', encoding='utf-8', newline='') as f: json.dump(sortedCollection, f, ensure_ascii=True, indent=4)
class ASTemps(): def __init__(self): self.client = ASnakeClient() self.auth = self.client.authorize() self.all_schemas = self.get_schemas() #a list of all enumerations #COULD ALSO DO /config/enumerations/names/:enum_name self.all_enums = self.get_dynamic_enums() #gets the list of schema names self.schema_list = [key for key in self.all_schemas.keys()] #gets the type list self.type_list = list( set([ k for value in self.all_schemas.values() for k, v in value.items() ])) self.jsonmodel_pattern = re.compile( '(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)') def get_schemas(self): schemas = self.client.get('/schemas').json() return (schemas) def get_schema(self, schema): schema = self.client.get('/schemas/' + schema).json() return (schema) def get_dynamic_enums(self): enums = self.client.get('/config/enumerations').json() return (enums) def parse_jsonmodel(self, obj_value): #reg ex to capture all jsonmodel references in schema #jsonmodel = re.compile('(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)') logging.debug('starting jsonmodel') if self.jsonmodel_pattern.match(obj_value): logging.debug('match with ' + str(obj_value)) #gets the name of the schema stripped_string = obj_value[obj_value.find("(") + 1:obj_value.find(")")][1:] if stripped_string != 'repository': logging.debug('Getting schema for: ' + stripped_string) jsonmodel_schema = self.all_schemas[stripped_string] #wondering if this is where the problem is??? I know this works in some cases if 'uri' in obj_value: logging.debug('uri in obj_value') parsed_json = {'ref': jsonmodel_schema['uri']} logging.debug(str(parsed_json)) #LOL this also gets digital objects if 'object' in obj_value: if 'digital_object' not in obj_value: logging.debug('object in obj_value') #workaround for testing - infinite recursion - but only fixes part of it... if stripped_string == 'note_outline_level': parsed_json = None else: #THIS IS BROKEN!!!! INFINITE RECURSION logging.debug("obj_value " + str(obj_value)) logging.debug('running parse_schema on ' + str(obj_value)) parsed_json = self.parse_schema( stripped_string, jsonmodel_schema) #saves lots of memory, likely will not change. if stripped_string == 'repository': parsed_json = {'ref': '/repositories/:repo_id'} return parsed_json #still more to do with the other ref properties def parse_refs(self, schema_name, obj_name, obj_value): logging.debug('starting parse_refs on ' + str(schema_name) + ' ' + str(obj_name)) #go through the properties of the refs if 'properties' in obj_value: logging.debug('properties in ' + str(obj_value)) if 'ref' in obj_value['properties']: logging.debug('ref in properties') if type(obj_value['properties']['ref']['type']) is list: logging.debug('Type of ref is list') logging.debug("obj_value['properties']['ref']['type']: " + str(obj_value['properties']['ref']['type'])) ref_list = [] for ref in obj_value['properties']['ref']['type']: logging.debug('Looping through ref list') logging.debug(obj_value['properties']['ref']['type']) logging.debug(ref['type']) #FIX THIS parsed_ref = self.parse_jsonmodel(ref['type']) logging.debug('parsed ref ' + str(parsed_ref)) ref_list.append(parsed_ref) logging.debug('ref_list: ' + str(ref_list)) return ref_list else: logging.debug('Type of ref is not list') if self.jsonmodel_pattern.match( obj_value['properties']['ref']['type']): logging.debug( 'RE match ' + str(obj_value['properties']['ref']['type'])) logging.debug('calling parse_jsonmodel') parsed_ref = self.parse_jsonmodel( obj_value['properties']['ref']['type']) return parsed_ref else: logging.debug('properties not in ' + str(obj_name) + 'value dictionary') logging.debug(str(obj_value['ref']['type'])) if self.jsonmodel_pattern.match(obj_value['ref']['type']): logging.debug( str(obj_value['ref']['type']) + ' matches jsonmodel pattern') logging.debug('Calling parse_jsonmodel on ' + str(obj_value['ref']['type'])) parsed_ref = self.parse_jsonmodel(obj_value['ref']['type']) return parsed_ref def parse_enums(self, enum_name): enum_list = [] for enum in self.all_enums: if enum['name'] == enum_name: for ev in enum['enumeration_values']: enum_list.append(ev['value']) return enum_list def parse_schema(self, schema_name, schema_def): try: logging.debug("Working on schema: " + str(schema_name)) template_dict = {} #Fixes infinite recursion for now exclusions = [ 'collection_management', 'rights_statement', 'rights_statement_act', 'note_rights_statement', 'note_rights_statement_act', 'children', 'deaccessions', '_inherited', 'rights_statements', 'external_id' ] for prop_name, prop_value in schema_def['properties'].items(): logging.debug("Working on prop: " + str(prop_name)) if schema_name in exclusions: print(schema_name + ' in exclusion list') continue elif prop_name in exclusions: print(str(prop_name) + ' in exclusion list') continue #If there is more than one type it will be stored in a list. elif type(prop_value['type']) is list: ''' INTEGER/STRING This is always (and only? )the lock version. Don't need to do anything with it, but will keep in the check in case the schema changes. ''' #WHAT WOULD HAPPEN IF I JUST SKIPPED ALTOGETHER - NOTHING STILL F****D!! # if prop_value['type'] == ['integer', 'string']: # if prop_name == 'lock_version': # logging.debug(schema_name, prop_name, prop_value) # continue # if prop_name != 'lock_version': # template_dict[prop_name] = None ''' What is this doing??? ''' if 'query' in prop_value['type'][0]: continue #logging.debug(schema_name, prop_name, prop_value) ''' What is this doing??? ''' if type(prop_value['type'][0]) is dict: continue #if 'agent' in prop_value['type'][0]['type']: #logging.debug(schema_name, prop_name, prop_value) #If there is only one type it won't be in a list. else: ''' JSONMODEL TYPES Can be either an object or URI. Refers to another schema or a reference to another object. i.e. date subrecords, location URIs ''' if self.jsonmodel_pattern.match(prop_value['type']): logging.debug('Regex match, ' + str(prop_value['type'])) #Don't add read-only fields to the template. Might want to change this #in the case of URIs or IDs...but don't worry about it for now. if 'readonly' in prop_value: logging.debug('Property value is readonly') if 'subtype' in prop_value: logging.debug('Subtype in property value') if prop_value['subtype'] == 'ref': logging.debug( 'Subtype of ' + str(prop_name) + 'is ref, calling parse_jsonmodel on ' + str(prop_value['tyoe'])) template_dict[ prop_name] = self.parse_jsonmodel( prop_value['type']) else: logging.debug( 'readonly not in property value dict, calling parse_jsonmodel on ' + str(prop_value['type'])) template_dict[prop_name] = self.parse_jsonmodel( prop_value['type']) elif prop_value['type'] == 'array': logging.debug('Prop value type is array') #this will always be the case I think? Check if 'items' in prop_value: #no need to have readonly fields in template??? #if there is more than one type if type(prop_value['items']['type']) is list: logging.debug('Type of array items is list') template_dict[prop_name] = [] #this might always be object??? check and see for prop_type in prop_value['items']['type']: if self.jsonmodel_pattern.match( prop_type['type']): parsed_json = self.parse_jsonmodel( prop_type['type']) template_dict[prop_name].append( parsed_json) if prop_type['type'] is 'object': logging.debug(schema_name, prop_name, prop_value) #If there is only one type... else: logging.debug('Type of array items is object') if prop_value['items']['type'] is 'object': if 'subtype' in prop_value['items']: #these usually have properties if 'properties' in prop_value['items']: template_dict[ prop_name] = self.parse_refs( schema_name, prop_name, prop_value) else: if 'properties' in prop_value['items']: logging.debug( schema_name, schema_name, prop_name, prop_value) if prop_value['items']['type'] == 'string': if 'enum' in prop_value['items']: template_dict[prop_name] = prop_value[ 'items']['enum'] #if it matches the object pattern if self.jsonmodel_pattern.match( prop_value['items']['type']): logging.debug(prop_name) logging.debug( str(prop_value['items']['type'])) parsed_json = self.parse_jsonmodel( prop_value['items']['type']) template_dict[prop_name] = [parsed_json] #Changing this from 'is' to '==' causes infinite recursion. Interestingly changing it above causes many #fields to be removed from the templates - 2 other instances of is/== 'object' elif prop_value['type'] == 'object': logging.debug('Prop value type is object') if 'properties' in prop_value: if 'subtype' in prop_value: logging.debug( 'subtype in prop value, calling parse_refs on ' + str(schema_name) + ' ' + str(prop_name)) #these are all refs I think template_dict[prop_name] = self.parse_refs( schema_name, prop_name, prop_value) else: logging.debug('subtype not in prop_value: ') logging.debug(schema_name, prop_name, prop_value) elif prop_value['type'] == 'string': logging.debug('Prop value is string') #enums are always strings if 'readonly' not in prop_value: logging.debug( 'readonly not in prop value dictionary') if 'enum' in prop_value: template_dict[prop_name] = prop_value['enum'] if 'dynamic_enum' in prop_value: template_dict[prop_name] = self.parse_enums( prop_value['dynamic_enum']) else: template_dict[prop_name] = None elif prop_value['type'] in [ 'integer', 'boolean', 'date', 'date-time', 'number' ]: logging.debug( 'Prop value is type int, bool, date, date-time, number' ) #make sure this is correct, as in not missing something that should be there if 'readonly' not in prop_value: logging.debug( 'readonly not in prop value dictionary') template_dict[prop_name] = None else: logging.debug('Value not of a recognized type') except KeyError: logging.debug('KeyError: ' + schema_name + ' ' + prop_name) except Exception as exc: logging.debug('Error: ' + schema_name + ' ' + prop_name) logging.debug(traceback.format_exc()) finally: template_dict['jsonmodel_type'] = schema_name return template_dict #QUESTION - SHOULD I CREATE LITTLE FUNCTIONS FOR EACH TYPE - i.e if whatever is 'object', #then do function stuff...might help with the nesting #want to go through each schema and create a sample dictionary template #need to be able to handle just one schema def parse_schemas(self, schemas): template_dict = {} for schema_name, schema_def in schemas.items(): #check for a parent - but one that isn't "abstract" because those fields are the same #WHAT TO DO WITH THIS???? # if 'parent' in schema_def: # pass temp = self.parse_schema(schema_name, schema_def) template_dict[schema_name] = temp return template_dict def create_csv_template(self, jsontemplatedict): ''' Goal is to create the JSON templates, and then convert those to CSV file that can be used to create either full finding aids/top level records, or to update subrecords in bulk ''' fileob = open(jsontemplatedict['jsonmodel_type'] + '.csv', 'a', encoding='utf-8', newline='') csvout = csv.writer(fileob) subfield_list = [] for key, value in jsontemplatedict.items(): if type(value) is list: #should I just check the first one instead of looping through all? if type(value[0]) is dict: for item in value: for k in item.keys(): subfield_list.append( jsontemplatedict['jsonmodel_type'] + '_' + key + '_' + k) #only two options for lists, correct? if type(value[0]) is not dict: #this means that it's just a list of enums probably - right?? No other list formats #do I need the check now that I removed the loop? check = jsontemplatedict['jsonmodel_type'] + '_' + key if check not in subfield_list: subfield_list.append( jsontemplatedict['jsonmodel_type'] + '_' + key) else: subfield_list.append(jsontemplatedict['jsonmodel_type'] + '_' + key) csvout.writerow(subfield_list) fileob.close() return subfield_list #Wrapper loop to create all templates def create_csv_templates(self, jsontemplates): for template_key, template_value in jsontemplates.items(): self.create_csv_template(template_value) def download_templates(self, jsontemplates): for template_key, template_value in jsontemplates.items(): outfile = open(str(template_key) + '.json', 'w', encoding='utf-8') json.dump(template_value, outfile, sort_keys=True, indent=4)
combined_aspace_id_clean = id_combined_regex.sub('', combined_id) if resource.json()["publish"] is True: if resource.status_code == 200: export_ead = client.get( "repositories/{}/resource_descriptions/{}.xml".format( repo_id, resource_id), params={ "include_unpublished": False, "include_daos": True, "numbered_cs": True, "print_pdf": False, "ead3": False }) filepath = str(Path(source_path, combined_aspace_id_clean)) + ".xml" with open(filepath, "wb") as local_file: local_file.write(export_ead.content) local_file.close() print("Exported: {}".format(combined_id)) else: print( "\nThe following errors were found when exporting {}:\n{}: {}\n" .format(combined_id, resource, resource.text)) print("-" * 100) sourcepath = input("Enter folder path for exported EADs: ") asp_client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw) asp_client.authorize() export_eads(asp_client, sourcepath)
def get_aspace_log(defaults): """ Gets a user's ArchiveSpace credentials. There are 3 components to it, the setup code, correct_creds while loop, and the window_asplog_active while loop. It uses ASnake.client to authenticate and stay connected to ArchivesSpace. Documentation for ASnake can be found here: https://archivesspace-labs.github.io/ArchivesSnake/html/index.html Args: defaults (UserSetting class): contains the data from defaults.json file, all data the user has specified as default Returns: close_program (bool): if a user exits the popup, this will return true and end run_gui() connect_client (ASnake.client object): the ArchivesSpace ASnake client for accessing and connecting to the API """ connect_client = None repositories = {} save_button_asp = " Save and Continue " window_asplog_active = True correct_creds = False close_program = False while correct_creds is False: asplog_col1 = [ [psg.Text("ArchivesSpace username:"******"Roboto", 11))], [psg.Text("ArchivesSpace password:"******"Roboto", 11))], [psg.Text("ArchivesSpace API URL:", font=("Roboto", 11))] ] asplog_col2 = [[psg.InputText(focus=True, key="_ASPACE_UNAME_")], [ psg.InputText(password_char='*', key="_ASPACE_PWORD_") ], [psg.InputText(defaults["as_api"], key="_ASPACE_API_")]] layout_asplog = [[ psg.Column(asplog_col1, key="_ASPLOG_COL1_", visible=True), psg.Column(asplog_col2, key="_ASPLOG_COL2_", visible=True) ], [ psg.Button(save_button_asp, bind_return_key=True, key="_SAVE_CLOSE_LOGIN_") ]] window_login = psg.Window("ArchivesSpace Login Credentials", layout_asplog) while window_asplog_active is True: event_log, values_log = window_login.Read() if event_log == "_SAVE_CLOSE_LOGIN_": try: connect_client = ASnakeClient( baseurl=values_log["_ASPACE_API_"], username=values_log["_ASPACE_UNAME_"], password=values_log["_ASPACE_PWORD_"]) connect_client.authorize() defaults["as_api"] = values_log["_ASPACE_API_"] repo_results = connect_client.get('/repositories') repo_results_dec = json.loads( repo_results.content.decode()) for result in repo_results_dec: uri_components = result["uri"].split("/") repositories[result["name"]] = int(uri_components[-1]) window_asplog_active = False correct_creds = True except Exception as e: error_message = "" if ":" in str(e): error_divided = str(e).split(":") for line in error_divided: error_message += line + "\n" else: error_message = str(e) psg.Popup( "Your username and/or password were entered incorrectly. Please try again.\n\n" + error_message) if event_log is None or event_log == 'Cancel': window_login.close() window_asplog_active = False correct_creds = True close_program = True break window_login.close() return close_program, connect_client, repositories
class ASpace(): # this happens when you call ASpace() def __init__(self, **config): # Repository will default to 2 if not provided self.repository = config.pop('repository', '2') # Connect to ASpace using .archivessnake.yml self.__client = ASnakeClient(**config) self.__client.authorize() m = re.match(r'\(v?(.+\))', self.__client.get('version').text) if m: self.version = m[1] else: self.version = 'unknown version' # this automatically sets attributes to ASpace(), so you can ASpace().resources, etc. def __getattr__(self, attr): if not attr.startswith('_'): # This sets plural attributes, like resources and archival_objects # Not sure if this is safe if attr.lower().endswith("s"): shortCalls = [ "repositories", "locations", "subjects", "users", "vocabularies", "location_profiles", "container_profiles" ] #for calls without repositories in them if attr in shortCalls: return jsonmodel_muliple_object( self.__client.get("/" + str(attr), params={ "all_ids": True }).json(), self.__client, self.repository, attr) else: return jsonmodel_muliple_object( self.__client.get( "/repositories/" + str(self.repository) + "/" + str(attr), params={ "all_ids": True }).json(), self.__client, self.repository, attr) def resources(self): '''return all resources from every repo''' repo_uris = [ r['uri'] for r in self.__client.get('repositories').json() ] for resource in chain(*[ paged_result('{}/resources'.format(uri), self.__client) for uri in repo_uris ]): yield resource # not sure if theres a way to pass a variable to implement this with __getattr__ def resource(self, id): return jsonmodel_single_object( self.__client.get("repositories/" + self.repository + "/resources/" + str(id)).json(), self.__client) #this doesn't work yet def resourceID(self, id): result = self.__client.get( "/repositories/" + self.repository + "/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + str(id) + "\", \"jsonmodel_type\":\"field_query\"}}").json() resourceURI = result["results"][0]["uri"] return jsonmodel_single_object( self.__client.get(resourceURI).json(), self.__client) def archival_object(self, id): if isinstance(id, str): if len(id) == 32: # its a ref_id params = {"ref_id[]": str(id)} refList = self.__client.get( "repositories/" + self.repository + "/find_by_id/archival_objects?page=1&ref_id[]=" + str(id)).json() return jsonmodel_single_object( self.__client.get( refList["archival_objects"][0]["ref"]).json(), self.__client) #its a uri number return jsonmodel_single_object( self.__client.get("repositories/" + self.repository + "/archival_objects/" + str(id)).json(), self.__client) def agents(self, type, id=None): if id == None: return jsonmodel_muliple_object( self.__client.get("/agents/" + str(type) + "?all_ids=true").json(), self.__client, self.repository, type) else: return jsonmodel_single_object( self.__client.get("/agents/" + str(type) + "/" + str(id)).json(), self.__client)