def __init__(self, baseurl, username, password, repo_id): self.client = ASnakeClient(baseurl=baseurl, username=username, password=password) self.repo_id = repo_id if not self.client.authorize(): raise ArchivesSpaceClientError( "Couldn't authenticate user credentials for ArchivesSpace") self.TYPE_LIST = { "family": ["agent_family", "agents/families"], "organization": ["agent_corporate_entity", "agents/corporate_entities"], "person": ["agent_person", "agents/people"], "component": [ "archival_object", "repositories/{repo_id}/archival_objects".format( repo_id=self.repo_id) ], "accession": [ "accession", "repositories/{repo_id}/accessions".format( repo_id=self.repo_id) ], "digital object": [ "digital_objects", "repositories/{repo_id}/digital_objects".format( repo_id=self.repo_id) ] }
def __init__(self, **config): # Connect to ASpace using .archivessnake.yml self.client = ASnakeClient(**config) self.client.authorize() m = re.match(r'\(v?(.+\))', self.client.get('version').text) if m: self.version = m[1] else: self.version = 'unknown version'
def test_authorize(): client = ASnakeClient() # relies on default config, see ASnakeConfig class toke = client.authorize() assert isinstance(toke, str) assert len(toke) == 64 assert set(toke) <= set('0123456789abcdef') assert client.session.headers['X-ArchivesSpace-Session'] == toke # Try to get admin user info, should only work if we're authed as admin assert client.get('users/1').status_code == 200
class ASpace(): # this happens when you call ASpace() def __init__(self, **config): # Connect to ASpace using .archivessnake.yml self.client = ASnakeClient(**config) self.client.authorize() m = re.match(r'\(v?(.+\))', self.client.get('version').text) if m: self.version = m[1] else: self.version = 'unknown version' def __getattr__(self, attr): '''returns the JSONModelRelation representing the route with the same name as the attribute requested.''' if not attr.startswith('_'): return JSONModelRelation("/{}".format(attr), params={"all_ids": True}, client = self.client) @property def resources(self): '''return all resources from every repo.''' return ResourceRelation({}, self.client) @property def agents(self): '''returns an AgentRelation.''' return AgentRelation("/agents", {}, self.client) @property def users(self): '''returns a UserRelation.''' return UserRelation("/users", {}, self.client) def by_external_id(self, external_id, record_types=None): '''return any resources fetched from the 'by-external-id' route. Note: while the route will return differently depending on how many records are returned, this method deliberately flattens that out - it will _always_ return a generator, even if only one record is found.''' params = {"eid": external_id} if record_types: params['type[]'] = record_types res = self.client.get('by-external-id', params=params) if res.status_code == 404: return [] elif res.status_code == 300: # multiple returns, bare list of uris yield from (wrap_json_object({"ref": uri}, self.client) for uri in IndexedSet(res.json())) elif res.status_code == 200: # single obj, redirects to obj with 303->200 yield wrap_json_object(res.json(), self.client) else: raise ASnakeBadReturnCode("by-external-id call returned '{}'".format(res.status_code)) def from_uri(self, uri): '''returns a JSONModelObject representing the URI passed in''' return wrap_json_object(self.client.get(uri).json(), self.client)
def __init__(self, **config): # Repository will default to 2 if not provided self.repository = config.pop('repository', '2') # Connect to ASpace using .archivessnake.yml self.__client = ASnakeClient(**config) self.__client.authorize() m = re.match(r'\(v?(.+\))', self.__client.get('version').text) if m: self.version = m[1] else: self.version = 'unknown version'
def __init__(self): self.client = ASnakeClient() self.auth = self.client.authorize() self.all_schemas = self.get_schemas() #a list of all enumerations #COULD ALSO DO /config/enumerations/names/:enum_name self.all_enums = self.get_dynamic_enums() #gets the list of schema names self.schema_list = [key for key in self.all_schemas.keys()] #gets the type list self.type_list = list( set([ k for value in self.all_schemas.values() for k, v in value.items() ])) self.jsonmodel_pattern = re.compile( '(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)')
def main(): client = ASnakeClient(baseurl='XXXX', username='******', password='******') client.authorize() changes = { 'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'], 'cubic_feet': ['Cubic Feet'], 'gigabytes': ['Gigabytes'] } res_records = (client.get('repositories/2/resources', params={'all_ids': True})).json() found_records = set([]) for record in tqdm(res_records): rec_uri = 'repositories/2/resources/{0}'.format(record) res_record = client.get(rec_uri).json() updated_record = deepcopy(res_record) try: extents = res_record['extents'] for ext_index, extent in enumerate(extents): for key, value in changes.items(): if extent['extent_type'] in value: updated_record['extents'][ext_index][ 'extent_type'] = key break else: pass if res_record['extents'] != updated_record['extents']: response = client.post(rec_uri, json=updated_record) if response.status_code == 200: logger.info('Extent change successfully pushed', rec=record, response=response) found_records.add(record) else: logger.info('Extent change failed', rec=record, response=response) else: pass except: pass print('{0} resource records checked; {1} records updated.'.format( len(res_records), len(found_records)))
import json, time, runtime from asnake.client import ASnakeClient from asnake.client.web_client import ASnakeAuthError # Create a client client = ASnakeClient() client.authorize() # login, using default values # print instructions print ("This script will add the container_profiles included in a separate json file to ArchivesSpace.") input("Press Enter to continue...") # post container_profiles print ("The following container profiles have been added to ArchivesSpace:") jsonfile = open("containerProfiles.json") jsonfile = json.load(jsonfile) for container_profile in jsonfile: post = client.post("/container_profiles", json=container_profile).json() print (post) print ("You've just completed your first API POST. Congratulations!")
#/usr/bin/python3 #~/anaconda3/bin/python from asnake.client import ASnakeClient import asnake.logging as logging logging.setup_logging(filename="date_update.log", filemode="a") logger = logging.get_logger("date_updating") #Log Into ASpace and set repo to RL aspace_client = ASnakeClient(baseurl="[backendURL]", username="******", password="******") aspace_client.authorize() repo = aspace_client.get("repositories/2").json() print("Logged into: " + repo['name']) print("Getting list of resources...") resources_list = aspace_client.get( "repositories/2/resources?all_ids=true").json() resources_sorted = sorted(resources_list, reverse=True) for resource in resources_sorted: try: resource_json = aspace_client.get("repositories/2/resources/" + str(resource)).json() #print (resource_json) resource_uri = resource_json['uri'] print("updating: " + resource_uri) resource_update = aspace_client.post(resource_json['uri'], json=resource_json)
from secrets import * from asnake.aspace import ASpace from asnake.client import ASnakeClient aspace = ASpace(baseurl=as_api, username=as_un, password=as_pw) client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw) client.authorize() resource_ids = ["/repositories/4/resources/4103", "/repositories/4/resources/4064", "/repositories/4/resources/2798", "/repositories/4/resources/1001", "/repositories/4/resources/4048", "/repositories/2/resources/633", "/repositories/2/resources/723", "/repositories/2/resources/748", "/repositories/2/resources/414"] # "/repositories/5/resources/5071" - UA collection - Steve to check with Kat for resource_id in resource_ids: unknown_count = 0 uri_breakup = resource_id.split("/") res_id = uri_breakup[4] repo_id = uri_breakup[2] try: rl_repo = aspace.repositories(repo_id) resource_record = rl_repo.resources(res_id).tree resource_tree = resource_record.walk print(rl_repo.resources(res_id).json()["title"]) for node in resource_tree: ao_json = client.get(node.uri).json() for instance in ao_json["instances"]: if "sub_container" in instance.keys(): indicators = [] types = [] for key, value in instance["sub_container"].items(): if "indicator_" in key:
import json, csv, runtime from asnake.client import ASnakeClient # print instructions print( 'This script takes viafCorporateResults.csv and posts the organizations as corporate_entities to ArchivesSpace.' ) input('Press Enter to continue...') # This is where we connect to ArchivesSpace. client = ASnakeClient() client.authorize() # login, using default values targetFile = 'viafCorporateResults.csv' csv = csv.DictReader(open(targetFile)) orgList = [] for row in csv: orgRecord = {} # changed this since ASpace doesn't come with 'viaf' as an option for source out of the box. source = 'naf' if row.get('lc') is not None else 'local' orgRecord['names'] = [{ 'primary_name': row['result'], 'sort_name': row['result'], 'source': source, 'authority_id': row['lc'] }] post = client.post('/agents/corporate_entities', json=orgRecord).json() print(post, '\n')
#Then, checks to see if REsources have finding_aid_status = 'published' in ASpace #If so, exports EADs to specified location using EADID as filename import io import csv from asnake.client import ASnakeClient from asnake.aspace import ASpace aspace = ASpace(baseurl="[ASPACE BACKEND URL]", username="******", password="******") #Log Into ASpace and set repo to RL aspace_client = ASnakeClient(baseurl="[ASPACE BACKEND URL]", username="******", password="******") aspace_client.authorize() repo = aspace_client.get("repositories/2").json() print("Logged into: " + repo['name']) destination = 'C:/users/nh48/desktop/as_exports_temp/' input_csv = input("Path to CSV Input: ") #output will be input CSV plus some extra columns for reporting on actions taken, errors, etc. updated_records_csv = input("Path to CSV Output: ") #If Resource finding aid status = published, export the EAD for the resource, save to folder def if_published_export_EAD(resource_uri):
import requests, csv, json, time from urllib.parse import quote from asnake.client import ASnakeClient viafURL = 'http://viaf.org/viaf/search?query=local.personalNames+%3D+%22' # # print instructions print( 'This script queries existing person agent records in ArchivesSpace with the source of "viaf" and updates them with the proper/updated name form from VIAF (if one exists) and appends the VIAF URI to the existing records. Please note: This is a PROOF OF CONCEPT script, and should not be used in production settings without thinking this through!' ) input('Press Enter to continue...') # This is where we connect to ArchivesSpace. See authenticate.py client = ASnakeClient() client.authorize() # login, using default values # search AS for person agents with source "viaf" query = json.dumps({ "query": { "jsonmodel_type": "boolean_query", "op": "AND", "subqueries": [{ "jsonmodel_type": "field_query", "field": "primary_type", "value": "agent_person", "literal": True }, { "jsonmodel_type": "field_query", "field": "source",
class ArchivesSpaceClient(object): """Client to get and receive data from ArchivesSpace.""" def __init__(self, baseurl, username, password, repo_id): self.client = ASnakeClient(baseurl=baseurl, username=username, password=password) self.repo_id = repo_id if not self.client.authorize(): raise ArchivesSpaceClientError( "Couldn't authenticate user credentials for ArchivesSpace") self.TYPE_LIST = { "family": ["agent_family", "agents/families"], "organization": ["agent_corporate_entity", "agents/corporate_entities"], "person": ["agent_person", "agents/people"], "component": [ "archival_object", "repositories/{repo_id}/archival_objects".format( repo_id=self.repo_id) ], "accession": [ "accession", "repositories/{repo_id}/accessions".format( repo_id=self.repo_id) ], "digital object": [ "digital_objects", "repositories/{repo_id}/digital_objects".format( repo_id=self.repo_id) ] } def send_request(self, method, url, data=None, **kwargs): """Base method for sending requests to ArchivesSpace.""" r = getattr(self.client, method)(url, data=json.dumps(data), **kwargs) if r.status_code == 200: return r.json() else: if r.json()["error"].get("id_0"): """Account for indexing delays by bumping up to the next accession number.""" id_1 = int(data["id_1"]) id_1 += 1 data["id_1"] = str(id_1).zfill(3) return self.create(data, "accession") raise ArchivesSpaceClientError( "Error sending {} request to {}: {}".format( method, url, r.json()["error"])) def retrieve(self, url, **kwargs): return self.send_request("get", url, **kwargs) def create(self, data, type, **kwargs): return self.send_request("post", self.TYPE_LIST[type][1], data, **kwargs) def update(self, uri, data, **kwargs): return self.send_request("post", uri, data, **kwargs) def get_or_create(self, type, field, value, last_updated, consumer_data): """ Attempts to find and return an object in ArchivesSpace. If the object is not found, creates and returns a new object. """ model_type = self.TYPE_LIST[type][0] endpoint = self.TYPE_LIST[type][1] query = json.dumps({ "query": { "field": field, "value": value, "jsonmodel_type": "field_query" } }) try: r = self.client.get("repositories/{}/search".format(self.repo_id), params={ "page": 1, "type[]": model_type, "aq": query }).json() if len(r["results"]) == 0: r = self.client.get(endpoint, params={ "all_ids": True, "modified_since": last_updated - 120 }).json() for ref in r: r = self.client.get("{}/{}".format(endpoint, ref)).json() if r[field] == str(value): return r["uri"] return self.create(consumer_data, type).get("uri") return r["results"][0]["uri"] except Exception as e: raise ArchivesSpaceClientError( "Error finding or creating object in ArchivesSpace: {}".format( e)) def next_accession_number(self): """ Finds the next available accession number by searching for accession numbers with the current year, and then incrementing. Assumes that accession numbers are in the format YYYY NNN, where YYYY is the current year and NNN is a zero-padded integer. """ current_year = str(date.today().year) try: query = json.dumps({ "query": { "field": "four_part_id", "value": current_year, "jsonmodel_type": "field_query" } }) r = self.client.get("repositories/{}/search".format(self.repo_id), params={ "page": 1, "type[]": "accession", "sort": "identifier desc", "aq": query }).json() number = "1" if r.get("total_hits") >= 1: if r["results"][0]["identifier"].split("-")[0] == current_year: id_1 = int(r["results"][0]["identifier"].split("-")[1]) id_1 += 1 number = str(id_1).zfill(3) return ":".join([current_year, number.zfill(3)]) except Exception as e: raise ArchivesSpaceClientError( "Error retrieving next accession number from ArchivesSpace: {}" .format(e))
from datetime import datetime from asnake.client import ASnakeClient from asnake.aspace import ASpace import asnake.logging as logging logging.setup_logging(level='DEBUG', filename="remove_fake_wrapper.log", filemode="a") aspace = ASpace(baseurl="[ASPACE API URL]", username="******", password="******") #Log Into ASpace and set repo to RL aspace_client = ASnakeClient(baseurl="[ASPACE API URL]", username="******", password="******") aspace_client.authorize() #Set target repo repo = aspace_client.get("repositories/2").json() print("Logged into: " + repo['name']) rl_repo = aspace.repositories(2) #input is output of SQL query above input_csv = input("Path to CSV Input: ") #output will be input CSV plus some extra columns for reporting on actions taken, errors, etc. updated_resources_csv = input("Path to CSV Output: ") #Test if more than one direct child of Resource Object
import re from asnake.aspace import ASpace from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() aspace = ASpace() repo = aspace.repositories(2) collection = repo.resources(189) for date in collection.dates: date_type = '' date_expression = '' date_begin = '' date_end = '' try: date_type = date.date_type date_expression = date.expression date_begin = date.begin date_end = date.end except KeyError: pass if re.match(r'^\d{4}$', date_expression) and date_type == 'inclusive': print('====================') print(collection.id_0 + '\t' + collection.title + '\t' + collection.uri) print(date_expression + '\t' + date_type + '\t' + date_begin + '\t' + date_end)
+ colID.replace(".", "-") + "aspace_" + refID + "?format=json", verify=False) if arclight.status_code == 200: parentList = [] itemData = arclight.json() for parent in itemData["response"][ "document"]["parent_ssm"][1:]: parentList.append( parent.split("_")[1]) parents = "|".join(parentList) else: #for new objects not yet indexed in ArcLight if tree is None: from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() ref = client.get( "repositories/2/find_by_id/archival_objects?ref_id[]=" + refID).json() item = client.get( ref["archival_objects"][0] ["ref"]).json() resource = client.get( item["resource"] ["ref"]).json() tree = client.get(resource["tree"] ["ref"]).json() else: ref = client.get(
prefix = '...' if position[1] + char_length > len(text): end = len(text) postfix = '' else: end = position[1] + char_length postfix = '...' context.append(prefix + text[start:end] + postfix) context = ' | '.join(context) return term_count, context client = ASnakeClient() primary_types = '/(resource|archival_object|accession|digital_object)/' results_file = 'term_audit_results.csv' # Repo list can either be a command line argument or prompted if len(sys.argv) == 2: repos = sys.argv[1] elif len(sys.argv) < 2: repos = input('Enter repository number (e.g., 1): ') else: sys.exit('Run script again with valid repo number(s)') if repos: repos = re.split(r'\D+', repos) repos = list(filter(None, repos))
#!/usr/bin/env python from asnake.client import ASnakeClient import pandas as pd import datetime from tqdm import tqdm client = ASnakeClient(baseurl='XXX', username='******', password='******') client.authorize() accession_records = client.get('repositories/2/accessions', params={ 'all_ids': True }).json() unit_column = [] extent_column = [] collection_no_column = [] created_column = [] start = datetime.datetime.strptime('2017-07-01', '%Y-%m-%d') end = datetime.datetime.strptime('2018-07-31', '%Y-%m-%d') for record in tqdm(accession_records): accession_uri = client.get('repositories/2/accessions/' + str(record)).json() create_date = accession_uri['create_time'][0:10] date_parsed = datetime.datetime.strptime(create_date, '%Y-%m-%d') if start <= date_parsed <= end: coll_num = accession_uri['id_0'] extents = accession_uri['extents']
import dacs import time import csv import shutil import csv import requests import json from asnake.client import ASnakeClient import asnake.logging as logging print ("\tConnecting to ArchivesSpace") client = ASnakeClient(baseurl="http://localhost:8092", username="******", password="******") client.authorize() logging.setup_logging(stream=sys.stdout, level='INFO') with open('items_output.csv', mode='r') as csv_file: csv_reader = csv.DictReader(csv_file) line_count = 0 for row in csv_reader: if line_count == 0: line_count += 1 title=str(row['Title']) identifier=str(row['Identifier'])
total = len(filelist) count = 0 for f in filelist: f = f.replace('\n', '') count += 1 makeRow(getAo(f),f) print('Row added! - ' + str(count) + "/" + str(total)) # enter aspace login info config = configparser.ConfigParser() config.read('local_settings.cfg') baseurl= config.get('ArchivesSpace', 'baseURL') user = input('ArchivesSpace username: '******'ArchivesSpace password:'******'\a')
def default_client(cls): '''return existing ASnakeClient or create, store, and return a new ASnakeClient''' if not cls.__default_client: from asnake.client import ASnakeClient cls.__default_client = ASnakeClient() return cls.__default_client
from asnake.client import ASnakeClient import re import logging from secrets import * id_field_regex = re.compile(r"(^id_+\d)") logging.basicConfig(filename="unpublish.log", level=logging.INFO) as_username = input("Enter your ArchivesSpace username: "******"Enter your ArchivesSpace password: "******"repositories").json() for repo in repos: print(repo["name"]) repo_id = repo["uri"].split("/")[2] resources = client.get("repositories/{}/resources".format(repo_id), params={"all_ids": True}).json() for resource_id in resources: resource = client.get("repositories/{}/resources/{}".format(repo_id, resource_id)).json() combined_id = "" for field, value in resource.items(): id_match = id_field_regex.match(field) if id_match: combined_id += value + "-" combined_id = combined_id[:-1] if "[CLOSED]" in combined_id: logging.info("Unpublishing {} from {}".format(combined_id, repo["name"])) print(combined_id) all_uris = client.get("repositories/{}/resources/{}/ordered_records".format(repo_id,
import sys import dacs import time import csv import shutil from git import Repo from datetime import datetime from subprocess import Popen, PIPE, STDOUT import asnake.logging as logging from asnake.client import ASnakeClient #from asnake.aspace import ASpace print (str(datetime.now()) + " Exporting Records from ArchivesSpace") print ("\tConnecting to ArchivesSpace") client = ASnakeClient() client.authorize() logging.setup_logging(stream=sys.stdout, level='INFO') #repo = ASpace().repositories(2) __location__ = os.path.dirname(os.path.realpath(__file__)) lastExportTime = time.time() try: timePath = os.path.join(__location__, "lastExport.txt") with open(timePath, 'r') as timeFile: startTime = int(timeFile.read().replace('\n', '')) timeFile.close() except: startTime = 0
def buildSelections(colID, refID=None, filter=None, date=False, verbose=False): client = ASnakeClient() client.authorize() collection = [] page = 1 outDir = "/media/SPE/uploads" if refID: url = "https://archives.albany.edu/catalog?f[record_parent_sim][]=" + refID + "&format=json&per_page=100" outFile = os.path.join(outDir, refID + ".json") descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") + "aspace_" + refID outDesc = os.path.join(outDir, "desc_" + refID + ".json") else: url = "https://archives.albany.edu/catalog?f[collection_number_sim][]=" + colID + "&format=json&per_page=100" outFile = os.path.join(outDir, colID.replace(".", "-") + ".json") descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") outDesc = os.path.join(outDir, "desc_" + colID.replace(".", "-") + ".json") if filter: url = url + "&" + filter print (descriptionURL + "?format=json") r = requests.get(descriptionURL + "?format=json", verify=False) print (r.status_code) with open(outDesc, 'w', encoding='utf-8', newline='') as f: json.dump(r.json()["response"], f, ensure_ascii=True, indent=4) def getPage(page, collection, url): r = requests.get(url + "&page=" + str(page), verify=False) print (r.status_code) for item in r.json()["response"]["docs"]: obj = {} obj["title"] = item["title_tesim"][0] obj["date"] = item["date_created_tesim"][0] #print (item) ref_id = item["archivesspace_record_tesim"][0] obj["thumb"] = "https://archives.albany.edu" + item["thumbnail_path_ss"] obj["url"] = "https://archives.albany.edu/concern/" + item["has_model_ssim"][0].lower() + "s/" + item["id"] record = client.get("repositories/2/find_by_id/archival_objects?ref_id[]=" + ref_id).json() ao = client.get(record["archival_objects"][0]["ref"]).json() print (ao["ref_id"]) dateNormal = ao["dates"][0]["begin"] if "end" in ao["dates"][0].keys(): dateNormal = dateNormal + "/" + ao["dates"][0]["end"] if "undated" in ao["dates"][0]["expression"].lower(): obj["date_normal"] = "9999" else: obj["date_normal"] = dateNormal if date: if not obj["date"].lower() == "undated": if obj["date"].lower().startswith("ca."): objDate = obj["date"].split(" ")[1] else: if "-" in obj["date"]: objDate = obj["date"].split("-")[0] else: objDate = obj["date"].split(" ")[0] print (objDate) try: if "-" in date: if int(objDate) >= int(date.split("-")[0]) and int(objDate) <= int(date.split("-")[1]): collection.append(obj) else: if int(objDate) < int(date): collection.append(obj) except: print ("Date Error: " + objDate) else: collection.append(obj) if r.json()["response"]["pages"]["last_page?"] == False: getPage(page + 1, collection, url) getPage(page, collection, url) #print (collection) sortedTitle = sorted(collection, key = lambda i: i['title'].split(" ")[0]) sortedCollection = sorted(sortedTitle, key = lambda i: i['date_normal'].split(" ")[0]) print (len(sortedCollection)) with open(outFile, 'w', encoding='utf-8', newline='') as f: json.dump(sortedCollection, f, ensure_ascii=True, indent=4)
#!/usr/bin/env python from asnake.client import ASnakeClient from tqdm import tqdm import pandas as pd import datetime import re client = ASnakeClient(baseurl='XXX', username='******', password='******') client.authorize() def pattern_matcher(x): """Match a resource title that ends with a comma.""" pattern_match = re.compile(r'^.*\>$|^\<.*$|^.*\<.*$|\>') result = pattern_match.match(x) return result def data_framer(rec_ids, rec_index, rec_titles): """Create a DataFrame from generated lists.""" rec_df = pd.DataFrame() rec_df['Resource_no'] = rec_index rec_df['Identifier'] = rec_ids rec_df['Collection_Title'] = rec_titles indexed_rec_df = rec_df.set_index(['Identifier']) return indexed_rec_df
import json, csv, runtime from asnake.client import ASnakeClient # print instructions print( 'This script replaces existing fauxcodes with real barcodes (linked in a separate csv file) in ArchivesSpace.' ) input('Press Enter to connect to ArchivesSpace and post those barcodes...') # This is where we connect to ArchivesSpace. See authenticate.py client = ASnakeClient() client.authorize() # open csv and generate dict reader = csv.DictReader(open('barcodes.csv')) # GET each top_container listed in top_containers and add to records print('The following barcodes have been updated in ArchivesSpace:') for row in reader: uri = row['uri'] container = client.get(uri).json() container['barcode'] = row['real'] post = client.post(uri, json=container).json() print(post)
# create formatter and add it to the handlers formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) # add the handlers to the logger logger.addHandler(fh) logger.addHandler(ch) config = configparser.ConfigParser() config.read('settings.ini') args.config = config try: client = ASnakeClient( baseurl=config['aspace_credentials']['api_host'], username=config['aspace_credentials']['username'], password=config['aspace_credentials']['password']) except KeyError as e: logger.error('settings.ini does not exist or is invalid') raise e # Simple sanity check to make sure client is setup try: resp = client.get('/') if not resp.ok: resp.raise_for_status() except: logger.error('Unable to contact ArchivesSpace instance at %s' % config['aspace_credentials']['api_host']) raise APIContactError( 'Unable to contact ArchivesSpace instance at %s' %
def main(): client = ASnakeClient(baseurl='XXXX', username='******', password='******') client.authorize() catalog = { 'linear': ['linear_feet', 'Linear Feet', 'linear ft.', 'Linear Foot'], 'cubic': ['cubic_feet', 'Cubic Feet'], 'gb': ['gigabytes', 'Gigabytes'] } res_records = (client.get('repositories/2/resources', params={'all_ids': True})).json() data_list = [] print('Compiling resource records from API...') for record in tqdm(res_records): res_record = client.get( 'repositories/2/resources/{0}'.format(record)).json() try: extents = res_record['extents'] for x in extents: if x['extent_type'] == 'megabytes': data_list.append({ 'id': res_record['id_0'], 'amount': str(float(x['number']) / 1000), 'units': 'gigabytes' }) else: data_list.append({ 'id': res_record['id_0'], 'amount': x['number'], 'units': x['extent_type'] }) except: pass linear_ms = 0 linear_ua = 0 gb_ms = 0 gb_ua = 0 cubic_ms = 0 cubic_ua = 0 print('Analyzing extents in resource data...') for entry in data_list: try: if entry['id'].startswith( 'MS') and entry['units'] in catalog['linear']: linear_ms += float(entry['amount']) elif entry['id'].startswith( 'UA') and entry['units'] in catalog['linear']: linear_ua += float(entry['amount']) elif entry['id'].startswith( 'MS') and entry['units'] in catalog['gb']: gb_ms += float(entry['amount']) elif entry['id'].startswith( 'UA') and entry['units'] in catalog['gb']: gb_ua += float(entry['amount']) elif entry['id'].startswith( 'MS') and entry['units'] in catalog['cubic']: cubic_ms += float(entry['amount']) elif entry['id'].startswith( 'UA') and entry['units'] in catalog['cubic']: cubic_ua += float(entry['amount']) else: pass except: exception = input( 'Uh oh, looks like the analysis ran into a snag; most likely, ' 'a unit of extent for {0} ({1}) is not a pure number. Enter ' '\'stop\' to kill the process so you can fix the record. Alternatively, ' 'you can enter \'continue\' to skip this entry and keep the analysis ' 'going.'.format(entry['id'], entry['amount'])) if (exception.lower()).strip() == 'continue': pass elif (exception.lower()).strip() == 'stop': quit() report = { 'MS Linear feet': round(linear_ms, 2), 'UA Linear feet': round(linear_ua, 2), 'Total linear feet': round((linear_ua + linear_ms), 2), 'MS GB': round(gb_ms, 2), 'UA GB': round(gb_ua, 2), 'Total GB': round((gb_ms + gb_ua), 2), 'MS Cubic feet': round(cubic_ms, 2), 'UA Cubic feet': round(cubic_ua, 2), 'Total Cubic feet': round((cubic_ua + cubic_ms), 2) } print('Generating report as JSON...') with open(('extent_calculator_' + (datetime.datetime.today().strftime('%Y-%m-%d')) + '.json'), 'w') as f: json.dump(report, f)
import csv, json from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() def startCSV(CSV): '''Creates the CSV with field names and writes header''' fieldnames = [ 'lock_version', 'indicator', 'uri', 'collection_identifier', 'series_identifier' ] with open(CSV, 'w', newline='') as outputCSV: writer = csv.DictWriter(outputCSV, fieldnames=fieldnames) writer.writeheader() def addCSV(CSV, lock, ind, uri, coll_id, ser_id): '''Opens CSV, appends row''' fieldnames = [ 'lock_version', 'indicator', 'uri', 'collection_identifier', 'series_identifier' ] with open(CSV, 'a', newline='') as outputCSV: writer = csv.DictWriter(outputCSV, fieldnames=fieldnames) writer.writerow({ 'lock_version': lock, 'indicator': ind, 'uri': uri, 'collection_identifier': coll_id,
def main(ID, path=None, accession=None): if path == None: if not os.path.isdir(defaultPath): raise Exception("ERROR: default path " + defaultPath + " does not exist.") path = os.path.join(defaultPath, ID) if not os.path.isdir(path): raise Exception("ERROR: no " + ID + " directory exists for ingest in " + defaultPath) else: if not os.path.isdir(path): raise Exception("ERROR: " + str(path) + " is not a valid path.") print("Reading " + path) if accession == None: print("Building SIP...") SIP = SubmissionInformationPackage() SIP.create(ID) SIP.package(path) print("SIP " + SIP.bagID + " created.") else: print("Reading accession " + accession) import asnake.logging as logging from asnake.client import ASnakeClient client = ASnakeClient() client.authorize() logging.setup_logging(stream=sys.stdout, level='INFO') call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}" accessionResponse = client.get(call).json() if len(accessionResponse["results"]) < 1: raise Exception("ERROR: Could not find accession with ID: " + accession) else: accessionObject = json.loads( accessionResponse["results"][0]["json"]) if "id_1" in accessionObject.keys(): accessionID = accessionObject["id_0"] + "-" + accessionObject[ "id_1"] if accession != accessionID: raise Exception( "ERROR: Could not find exact accession with ID: " + accession) if not "content_description" in accessionObject.keys(): raise Exception("ERROR: no content description in " + accessionID + " accession, " + accessionObject["uri"]) if len(accessionObject["related_resources"]) < 1: raise Exception("ERROR: no related resource for " + accessionID + " accession, " + accessionObject["uri"]) else: resource = client.get( accessionObject["related_resources"][0]["ref"]).json() creator = resource["title"] if not ID.lower() == resource["id_0"].lower(): raise Exception("ERROR: accession " + accessionID + " does not link to collection ID " + ID + ". Instead linked to " + resource["id_0"]) description = accessionObject["content_description"] print("Building SIP...") SIP = SubmissionInformationPackage() SIP.create(ID) SIP.package(path) print("SIP " + SIP.bagID + " created.") SIP.bag.info["Accession-Identifier"] = accessionID SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"] SIP.bag.info["Records-Creator"] = creator SIP.bag.info["Content-Description"] = description if "condition_description" in accessionObject.keys(): SIP.bag.info["Condition-Description"] = accessionObject[ "condition_description"] if "provenance" in accessionObject.keys(): SIP.bag.info["Provenance"] = accessionObject["provenance"] if "general_note" in accessionObject.keys(): SIP.bag.info["General-Note"] = accessionObject[ "general_note"] SIP.bag.info["Source-Location"] = path SIP.bag.info[ "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py" print("Writing checksums...") SIP.bag.save(manifests=True) print("SIP Saved!") # List files in txt for processing print("(not) Listing files for processing...") #listFiles(ID) if accession == None: SIP.extentLog( "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx" ) print("Logged ingest to DigitizationExtentTracker.") else: print("Updating accession " + accessionID) if "disposition" in accessionObject.keys(): accessionObject["disposition"] = accessionObject[ "disposition"] + "\n" + str(SIP.bagID) else: accessionObject["disposition"] = str(SIP.bagID) totalSize = SIP.size() inclusiveDates = SIP.dates() extent = { "jsonmodel_type": "extent", "portion": "whole", "number": str(totalSize[0]), "extent_type": str(totalSize[1]) } extentFiles = { "jsonmodel_type": "extent", "portion": "whole", "number": str(totalSize[2]), "extent_type": "Digital Files" } if inclusiveDates[0] == inclusiveDates[1]: date = { "jsonmodel_type": "date", "date_type": "inclusive", "label": "creation", "begin": inclusiveDates[0], "expression": inclusiveDates[0] } else: date = { "jsonmodel_type": "date", "date_type": "inclusive", "label": "creation", "begin": inclusiveDates[0], "end": inclusiveDates[1] } if "extents" in accessionObject.keys(): accessionObject["extents"].append(extent) accessionObject["extents"].append(extentFiles) else: accessionObject["extents"] = [extent, extentFiles] accessionObject["dates"].append(date) updateAccession = client.post(accessionObject["uri"], json=accessionObject) if updateAccession.status_code == 200: print("\tSuccessfully updated accession " + accessionID) else: print(updateAccession.text) print("\tERROR " + str(updateAccession.status_code) + "! Failed to update accession: " + accessionID) return SIP