class ASpace(): # this happens when you call ASpace() def __init__(self, **config): # Connect to ASpace using .archivessnake.yml self.client = ASnakeClient(**config) self.client.authorize() m = re.match(r'\(v?(.+\))', self.client.get('version').text) if m: self.version = m[1] else: self.version = 'unknown version' def __getattr__(self, attr): '''returns the JSONModelRelation representing the route with the same name as the attribute requested.''' if not attr.startswith('_'): return JSONModelRelation("/{}".format(attr), params={"all_ids": True}, client = self.client) @property def resources(self): '''return all resources from every repo.''' repo_uris = [r['uri'] for r in self.client.get('repositories').json()] for resource in chain(*[self.client.get_paged('{}/resources'.format(uri)) for uri in repo_uris]): yield wrap_json_object(resource, self.client) @property def agents(self): '''returns an AgentRelation.''' return AgentRelation("/agents", {}, self.client) def by_external_id(self, external_id, record_types=None): '''return any resources fetched from the 'by-external-id' route. Note: while the route will return differently depending on how many records are returned, this method deliberately flattens that out - it will _always_ return a generator, even if only one record is found.''' params = {"eid": external_id} if record_types: params['type[]'] = record_types res = self.client.get('by-external-id', params=params) if res.status_code == 404: return [] elif res.status_code == 300: # multiple returns, bare list of uris yield from (wrap_json_object({"ref": uri}, self.client) for uri in IndexedSet(res.json())) elif res.status_code == 200: # single obj, redirects to obj with 303->200 yield wrap_json_object(res.json(), self.client) else: raise ASnakeBadReturnCode("by-external-id call returned '{}'".format(res.status_code))
elif isinstance(repo, dict): # For pulling all repos from ASpace repo_code = repo['repo_code'] repo_uri = repo['uri'] repo_no = repo_uri.split('/')[-1] else: sys.exit('List of repositories is not valid') # For each repository search the API for the terms in the file list for search_term in search_terms: term = search_term['term'] regex = search_term['regex'] results = client.get_paged(f'repositories/{repo_no}/search', params={ "q": f"primary_type:{primary_types} \ # NOT types:pui \ AND (title:/{regex}/ OR notes:/{regex}/)" }) search_results = list(results) print(repo_code, repo_uri, term, len(search_results)) ## Process the search results for each term for result in search_results: json_data = json.loads(result.get('json')) matches = [] json_data = json.loads(result.get('json')) ## Process title title = json_data.get('title')
"op": "AND", "subqueries": [{ "jsonmodel_type": "field_query", "field": "primary_type", "value": "agent_person", "literal": True }, { "jsonmodel_type": "field_query", "field": "source", "value": "viaf", "literal": True }] } }) ASoutput = list(client.get_paged("/search", params={"filter": query})) print('Found ' + str(len(ASoutput)) + ' agents.') # grab uri out of agent for person in ASoutput: uri = person['uri'] personRecord = client.get(uri).json() lockVersion = str(personRecord['lock_version']) primary_name = personRecord['names'][0]['primary_name'] try: secondary_name = personRecord['names'][0]['rest_of_name'] except: secondary_name = '' try: dates = personRecord['names'][0]['dates'] except:
"value": ref_ID, "literal":True }, { "jsonmodel_type":"field_query", "field":"types", "value":"pui", "literal":True } ] } }) # it can take some time for the posted DOs to be indexed, so... showed_up_yet = None while not showed_up_yet: aoSearch = list(client.get_paged('search', params={"filter": AOQuery})) if any(aoSearch): showed_up_yet = True else: print("DOs not present in search yet, waiting a second for the indexer to catch up") sleep(1) linked_ao_uri = aoSearch[0]['uri'] # Get and store archival objects from above search aoRecord = client.get(linked_ao_uri).json() # Find existing instances and create new ones from new digital objects exising_instance = aoRecord['instances'][0] new_instance = {"instance_type": "digital_object", "digital_object": {"ref": uri}} # Merge old and new instances instances_new = [] instances_new.append(exising_instance)
# provide instructions print ('This script is used to link all top_containers in a single collection (identified by the ArchivesSpace resource ID number) to a single container_profile (identified by the ArchivesSpace container_profile ID number).') input('Press Enter to continue...') # have user enter resource id resource_id = input('Enter resource ID (in this case, you should enter 1): ') # search for top_containers linked to entered resource id endpoint = '/repositories/2/top_containers/search' advanced_query = json.dumps({ "filter_term": { "field": "collection_uri_u_sstr", "value": "/repositories/2/resources/" + resource_id, "jsonmodel_type":"field_query"} }) results = list(client.get_paged(endpoint, params={'aq': advanced_query})) # populate top_containers with the ids of each top_container in search results top_containers = [] for value in gen_dict_extract('id', results): top_containers.append(value) # GET each top_container listed in top_containers and add to records records = [] for top_container in top_containers: output = client.get(top_container).json() records.append(output) # have user enter container profile id profile_id = input('Enter container profile ID (I am going to enter 9. You can select another value, as long that ID is in your instance of ArchivesSpace.): ')
# search AS for archival_object's with level "Web archive" warchives = list(client.get_paged( # get_paged returns an iterator, so wrap in list since we use it multiple times 'search', # the query URL params={ "filter": json.dumps( # use json.dumps to serialize the query JSON into a string - remember that query is passed as a GET param in the URL {"query": {"jsonmodel_type": "boolean_query", "op":"AND", "subqueries":[ {"jsonmodel_type":"field_query", "field":"primary_type", "value":"archival_object", "literal":True}, {"jsonmodel_type":"field_query", "field":"level", "value":"Web archive", "literal":True}, {"jsonmodel_type":"field_query", "field":"types", "value":"pui", "literal":True} ] } } # end query ) # end json.dumps } # end params )) # end list and client.get_paged print('Found ' + str(len(warchives)) + ' archival objects with the instance type "Web archive."')