Example #1
0
 def __init__(self, baseurl, username, password, repo_id):
     self.client = ASnakeClient(baseurl=baseurl,
                                username=username,
                                password=password)
     self.repo_id = repo_id
     if not self.client.authorize():
         raise ArchivesSpaceClientError(
             "Couldn't authenticate user credentials for ArchivesSpace")
     self.TYPE_LIST = {
         "family": ["agent_family", "agents/families"],
         "organization":
         ["agent_corporate_entity", "agents/corporate_entities"],
         "person": ["agent_person", "agents/people"],
         "component": [
             "archival_object",
             "repositories/{repo_id}/archival_objects".format(
                 repo_id=self.repo_id)
         ],
         "accession": [
             "accession", "repositories/{repo_id}/accessions".format(
                 repo_id=self.repo_id)
         ],
         "digital object": [
             "digital_objects",
             "repositories/{repo_id}/digital_objects".format(
                 repo_id=self.repo_id)
         ]
     }
Example #2
0
 def __init__(self, **config):
     # Connect to ASpace using .archivessnake.yml
     self.client = ASnakeClient(**config)
     self.client.authorize()
     m = re.match(r'\(v?(.+\))', self.client.get('version').text)
     if m:
         self.version = m[1]
     else:
         self.version = 'unknown version'
Example #3
0
def test_authorize():
    client = ASnakeClient()  # relies on default config, see ASnakeConfig class
    toke = client.authorize()
    assert isinstance(toke, str)
    assert len(toke) == 64
    assert set(toke) <= set('0123456789abcdef')
    assert client.session.headers['X-ArchivesSpace-Session'] == toke
    # Try to get admin user info, should only work if we're authed as admin
    assert client.get('users/1').status_code == 200
Example #4
0
    def __init__(self, **config):
        # Repository will default to 2 if not provided
        self.repository = config.pop('repository', '2')

        # Connect to ASpace using .archivessnake.yml
        self.__client = ASnakeClient(**config)
        self.__client.authorize()
        m = re.match(r'\(v?(.+\))', self.__client.get('version').text)
        if m:
            self.version = m[1]
        else:
            self.version = 'unknown version'
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    changes = {
        'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic_feet': ['Cubic Feet'],
        'gigabytes': ['Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()
    found_records = set([])

    for record in tqdm(res_records):
        rec_uri = 'repositories/2/resources/{0}'.format(record)
        res_record = client.get(rec_uri).json()
        updated_record = deepcopy(res_record)
        try:
            extents = res_record['extents']
            for ext_index, extent in enumerate(extents):
                for key, value in changes.items():
                    if extent['extent_type'] in value:
                        updated_record['extents'][ext_index][
                            'extent_type'] = key
                        break
                    else:
                        pass
            if res_record['extents'] != updated_record['extents']:
                response = client.post(rec_uri, json=updated_record)
                if response.status_code == 200:
                    logger.info('Extent change successfully pushed',
                                rec=record,
                                response=response)
                    found_records.add(record)
                else:
                    logger.info('Extent change failed',
                                rec=record,
                                response=response)
            else:
                pass
        except:
            pass

    print('{0} resource records checked; {1} records updated.'.format(
        len(res_records), len(found_records)))
 def __init__(self):
     self.client = ASnakeClient()
     self.auth = self.client.authorize()
     self.all_schemas = self.get_schemas()
     #a list of all enumerations
     #COULD ALSO DO /config/enumerations/names/:enum_name
     self.all_enums = self.get_dynamic_enums()
     #gets the list of schema names
     self.schema_list = [key for key in self.all_schemas.keys()]
     #gets the type list
     self.type_list = list(
         set([
             k for value in self.all_schemas.values()
             for k, v in value.items()
         ]))
     self.jsonmodel_pattern = re.compile(
         '(JSONModel)(\(:.*?\)\s)(uri|object|uri_or_object)')
Example #7
0
#!/usr/bin/env python

from asnake.client import ASnakeClient
from tqdm import tqdm
import pandas as pd
import datetime
import re

client = ASnakeClient(baseurl='XXX',
                      username='******',
                      password='******')
client.authorize()


def pattern_matcher(x):
    """Match a resource title that ends with a comma."""
    pattern_match = re.compile(r'^.*\>$|^\<.*$|^.*\<.*$|\>')
    result = pattern_match.match(x)
    return result


def data_framer(rec_ids, rec_index, rec_titles):
    """Create a DataFrame from generated lists."""
    rec_df = pd.DataFrame()
    rec_df['Resource_no'] = rec_index
    rec_df['Identifier'] = rec_ids
    rec_df['Collection_Title'] = rec_titles
    indexed_rec_df = rec_df.set_index(['Identifier'])
    return indexed_rec_df

Example #8
0
#Python3.7
import io
import json
import time

from asnake.client import ASnakeClient
from asnake.aspace import ASpace

#BaseURL should point to backend (e.g. https://archivesspace.duke.edu/api or https://localhost:8089)
aspace = ASpace(baseurl="[baseurl]",
                username="******",
                password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[baseurl]",
                             username="******",
                             password="******")
aspace_client.authorize()

#set target repo by id
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

# Prompt for input, a comma separated list of EADID values (e.g. johndoepapers, janedoepapers, johnandjanedoepapers)
eadids = input("List of EADIDs:  ")
# Split comma separated list
eadids_list = eadids.split(",")

destination = 'C:/users/nh48/desktop/as_exports_temp/'

#set EAD export options: number components and include DAOs
Example #9
0
 def default_client(cls):
     '''return existing ASnakeClient or create, store, and return a new ASnakeClient'''
     if not cls.__default_client:
         from asnake.client import ASnakeClient
         cls.__default_client = ASnakeClient()
     return cls.__default_client
#!/usr/bin/env python

import re
from tqdm import tqdm
import pandas as pd
import datetime
import asnake.logging as logging
today_date = datetime.datetime.today().strftime('%Y-%m-%d')
logging.setup_logging(filename='comma_end_logfile_funct_' + str(today_date) +
                      '.log')
logger = logging.get_logger('comma_end_changes_log')
from asnake.client import ASnakeClient

client = ASnakeClient(baseurl='xxx', username='******', password='******')
client.authorize()


def pattern_matcher(x):
    """Match a resource title that ends with a comma."""
    pattern_match = re.compile(r'^.*\,$')
    result = pattern_match.match(x)
    return result


def extract_resources(y):
    """Look for ArchivesSpace resources that match pattern_matcher, then save them in a list and generate a CSV report."""
    if y == 'resources':
        obj_type = 'resource_records'
        all_records = client.get('repositories/2/resources',
                                 params={
                                     'all_ids': True
#/usr/bin/python3
#~/anaconda3/bin/python
from asnake.client import ASnakeClient
import asnake.logging as logging

logging.setup_logging(filename="date_update.log", filemode="a")
logger = logging.get_logger("date_updating")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[backendURL]",
                             username="******",
                             password="******")
aspace_client.authorize()
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

print("Getting list of resources...")
resources_list = aspace_client.get(
    "repositories/2/resources?all_ids=true").json()
resources_sorted = sorted(resources_list, reverse=True)

for resource in resources_sorted:

    try:
        resource_json = aspace_client.get("repositories/2/resources/" +
                                          str(resource)).json()
        #print (resource_json)
        resource_uri = resource_json['uri']
        print("updating: " + resource_uri)
        resource_update = aspace_client.post(resource_json['uri'],
                                             json=resource_json)
def main(ID, path=None, accession=None):

    if path == None:
        if not os.path.isdir(defaultPath):
            raise Exception("ERROR: default path " + defaultPath +
                            " does not exist.")
        path = os.path.join(defaultPath, ID)
        if not os.path.isdir(path):
            raise Exception("ERROR: no " + ID +
                            " directory exists for ingest in " + defaultPath)
    else:
        if not os.path.isdir(path):
            raise Exception("ERROR: " + str(path) + " is not a valid path.")
    print("Reading " + path)

    if accession == None:
        print("Building SIP...")
        SIP = SubmissionInformationPackage()
        SIP.create(ID)
        SIP.package(path)
        print("SIP " + SIP.bagID + " created.")

    else:
        print("Reading accession " + accession)
        import asnake.logging as logging
        from asnake.client import ASnakeClient
        client = ASnakeClient()
        client.authorize()

        logging.setup_logging(stream=sys.stdout, level='INFO')

        call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}"
        accessionResponse = client.get(call).json()
        if len(accessionResponse["results"]) < 1:
            raise Exception("ERROR: Could not find accession with ID: " +
                            accession)
        else:
            accessionObject = json.loads(
                accessionResponse["results"][0]["json"])
            if "id_1" in accessionObject.keys():
                accessionID = accessionObject["id_0"] + "-" + accessionObject[
                    "id_1"]
            if accession != accessionID:
                raise Exception(
                    "ERROR: Could not find exact accession with ID: " +
                    accession)
            if not "content_description" in accessionObject.keys():
                raise Exception("ERROR: no content description in " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            if len(accessionObject["related_resources"]) < 1:
                raise Exception("ERROR: no related resource for " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            else:
                resource = client.get(
                    accessionObject["related_resources"][0]["ref"]).json()
                creator = resource["title"]
                if not ID.lower() == resource["id_0"].lower():
                    raise Exception("ERROR: accession " + accessionID +
                                    " does not link to collection ID " + ID +
                                    ". Instead linked to " + resource["id_0"])
                description = accessionObject["content_description"]

                print("Building SIP...")
                SIP = SubmissionInformationPackage()
                SIP.create(ID)
                SIP.package(path)
                print("SIP " + SIP.bagID + " created.")

                SIP.bag.info["Accession-Identifier"] = accessionID
                SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"]
                SIP.bag.info["Records-Creator"] = creator
                SIP.bag.info["Content-Description"] = description
                if "condition_description" in accessionObject.keys():
                    SIP.bag.info["Condition-Description"] = accessionObject[
                        "condition_description"]
                if "provenance" in accessionObject.keys():
                    SIP.bag.info["Provenance"] = accessionObject["provenance"]
                if "general_note" in accessionObject.keys():
                    SIP.bag.info["General-Note"] = accessionObject[
                        "general_note"]
                SIP.bag.info["Source-Location"] = path
                SIP.bag.info[
                    "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py"

    print("Writing checksums...")
    SIP.bag.save(manifests=True)
    print("SIP Saved!")

    # List files in txt for processing
    print("(not) Listing files for processing...")
    #listFiles(ID)

    if accession == None:
        SIP.extentLog(
            "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx"
        )
        print("Logged ingest to DigitizationExtentTracker.")
    else:
        print("Updating accession " + accessionID)
        if "disposition" in accessionObject.keys():
            accessionObject["disposition"] = accessionObject[
                "disposition"] + "\n" + str(SIP.bagID)
        else:
            accessionObject["disposition"] = str(SIP.bagID)

        totalSize = SIP.size()
        inclusiveDates = SIP.dates()
        extent = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[0]),
            "extent_type": str(totalSize[1])
        }
        extentFiles = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[2]),
            "extent_type": "Digital Files"
        }
        if inclusiveDates[0] == inclusiveDates[1]:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "expression": inclusiveDates[0]
            }
        else:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "end": inclusiveDates[1]
            }
        if "extents" in accessionObject.keys():
            accessionObject["extents"].append(extent)
            accessionObject["extents"].append(extentFiles)
        else:
            accessionObject["extents"] = [extent, extentFiles]
        accessionObject["dates"].append(date)

        updateAccession = client.post(accessionObject["uri"],
                                      json=accessionObject)
        if updateAccession.status_code == 200:
            print("\tSuccessfully updated accession " + accessionID)
        else:
            print(updateAccession.text)
            print("\tERROR " + str(updateAccession.status_code) +
                  "! Failed to update accession: " + accessionID)

    return SIP
Example #13
0
#What Time is is? Trello Time! Set time to now
current_time = datetime.utcnow()

#Set time interval here (to get accessions created in last 24 hours)
current_time_minus_day = current_time - timedelta(hours=24)

#Convert time to ISO format for comparing to create dates in ASpace
current_time_minus_day = current_time_minus_day.isoformat()

print("Getting all Accessions created since: " + str(current_time_minus_day))

#ASNAKE
#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ArchivesSpace backend API URL]",
                             username="******",
                             password="******")
aspace_client.authorize()
#Set Target Repository
repo = aspace_client.get("repositories/2").json()
print(repo['name'])

accessions_list = aspace_client.get(
    "repositories/2/accessions?all_ids=true").json()
#Sort accessions by ASpace ID (e.g. repositories/2/accessions/1234)
accessions_sorted = sorted(accessions_list)

#Just get the last 20 created accessions in ASpace (based on IDs, not create time)
#assuming we won't create more than 20 accessions in time interval between cron jobs
#get last 20 accessions in list (most recent accession will be last in list)
last_20_accessions = accessions_sorted[-20:]
Example #14
0
def get_aspace_log(defaults):
    """
    Gets a user's ArchiveSpace credentials.
    There are 3 components to it, the setup code, correct_creds while loop, and the window_asplog_active while loop. It
    uses ASnake.client to authenticate and stay connected to ArchivesSpace. Documentation for ASnake can be found here:
    https://archivesspace-labs.github.io/ArchivesSnake/html/index.html
    Args:
        defaults (UserSetting class): contains the data from defaults.json file, all data the user has specified as default
    Returns:
        close_program (bool): if a user exits the popup, this will return true and end run_gui()
        connect_client (ASnake.client object): the ArchivesSpace ASnake client for accessing and connecting to the API
    """
    connect_client = None
    repositories = {}
    save_button_asp = " Save and Continue "
    window_asplog_active = True
    correct_creds = False
    close_program = False
    while correct_creds is False:
        asplog_col1 = [
            [psg.Text("ArchivesSpace username:"******"Roboto", 11))],
            [psg.Text("ArchivesSpace password:"******"Roboto", 11))],
            [psg.Text("ArchivesSpace API URL:", font=("Roboto", 11))]
        ]
        asplog_col2 = [[psg.InputText(focus=True, key="_ASPACE_UNAME_")],
                       [
                           psg.InputText(password_char='*',
                                         key="_ASPACE_PWORD_")
                       ],
                       [psg.InputText(defaults["as_api"], key="_ASPACE_API_")]]
        layout_asplog = [[
            psg.Column(asplog_col1, key="_ASPLOG_COL1_", visible=True),
            psg.Column(asplog_col2, key="_ASPLOG_COL2_", visible=True)
        ],
                         [
                             psg.Button(save_button_asp,
                                        bind_return_key=True,
                                        key="_SAVE_CLOSE_LOGIN_")
                         ]]
        window_login = psg.Window("ArchivesSpace Login Credentials",
                                  layout_asplog)
        while window_asplog_active is True:
            event_log, values_log = window_login.Read()
            if event_log == "_SAVE_CLOSE_LOGIN_":
                try:
                    connect_client = ASnakeClient(
                        baseurl=values_log["_ASPACE_API_"],
                        username=values_log["_ASPACE_UNAME_"],
                        password=values_log["_ASPACE_PWORD_"])
                    connect_client.authorize()
                    defaults["as_api"] = values_log["_ASPACE_API_"]
                    repo_results = connect_client.get('/repositories')
                    repo_results_dec = json.loads(
                        repo_results.content.decode())
                    for result in repo_results_dec:
                        uri_components = result["uri"].split("/")
                        repositories[result["name"]] = int(uri_components[-1])
                    window_asplog_active = False
                    correct_creds = True
                except Exception as e:
                    error_message = ""
                    if ":" in str(e):
                        error_divided = str(e).split(":")
                        for line in error_divided:
                            error_message += line + "\n"
                    else:
                        error_message = str(e)
                    psg.Popup(
                        "Your username and/or password were entered incorrectly. Please try again.\n\n"
                        + error_message)
            if event_log is None or event_log == 'Cancel':
                window_login.close()
                window_asplog_active = False
                correct_creds = True
                close_program = True
                break
        window_login.close()
    return close_program, connect_client, repositories
from asnake.client import ASnakeClient
import re
import logging
from secrets import *

id_field_regex = re.compile(r"(^id_+\d)")
logging.basicConfig(filename="unpublish.log", level=logging.INFO)
as_username = input("Enter your ArchivesSpace username: "******"Enter your ArchivesSpace password: "******"repositories").json()
    for repo in repos:
        print(repo["name"])
        repo_id = repo["uri"].split("/")[2]
        resources = client.get("repositories/{}/resources".format(repo_id), params={"all_ids": True}).json()
        for resource_id in resources:
            resource = client.get("repositories/{}/resources/{}".format(repo_id, resource_id)).json()
            combined_id = ""
            for field, value in resource.items():
                id_match = id_field_regex.match(field)
                if id_match:
                    combined_id += value + "-"
            combined_id = combined_id[:-1]
            if "[CLOSED]" in combined_id:
                logging.info("Unpublishing {} from {}".format(combined_id, repo["name"]))
                print(combined_id)
                all_uris = client.get("repositories/{}/resources/{}/ordered_records".format(repo_id,
Example #16
0
        f = f.replace('\n', '')  # account for new line in text file
        count += 1
        make_row(get_ao(f), f)
        if not count % 25:
            print(str(count) + ' rows added')


# enter aspace login info
config = configparser.ConfigParser()
config.read('local_settings.cfg')
baseurl = config.get('ArchivesSpace', 'baseURL')
user = input('ArchivesSpace username: '******'ArchivesSpace password:')

# start aspace session
client = ASnakeClient(baseurl=baseurl, username=user, password=pw)
print("Logging into ArchivesSpace...")
client.authorize()

# create spreadsheet
spreadsheet = open("find_on_demand.csv", "w")
writer = csv.writer(spreadsheet)
column_headings = [
    "Ref_id", "Title", "Component End Year", "Ancestor", "Parent Collection",
    "Remainder of Finding Aid Title", "Resource ID", "Resource Start Year"
]
writer.writerow(column_headings)


def create_file_list():
    print("Removing refids to ignore...")
def buildSelections(colID, refID=None, filter=None, date=False, verbose=False):

    client = ASnakeClient()
    client.authorize()
    
    collection = []
    page = 1

    outDir = "/media/SPE/uploads"
    
    if refID:
        url = "https://archives.albany.edu/catalog?f[record_parent_sim][]=" + refID + "&format=json&per_page=100"
        outFile = os.path.join(outDir, refID + ".json")
        descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-") + "aspace_" + refID
        outDesc = os.path.join(outDir, "desc_" + refID + ".json")
    else:
        url = "https://archives.albany.edu/catalog?f[collection_number_sim][]=" + colID + "&format=json&per_page=100"
        outFile = os.path.join(outDir, colID.replace(".", "-") + ".json")
        descriptionURL = "https://archives.albany.edu/description/catalog/" + colID.replace(".", "-")
        outDesc = os.path.join(outDir, "desc_" + colID.replace(".", "-") + ".json")
    if filter:
        url = url + "&" + filter
    
    print (descriptionURL + "?format=json")
    r = requests.get(descriptionURL + "?format=json", verify=False)
    print (r.status_code)
    with open(outDesc, 'w', encoding='utf-8', newline='') as f:
        json.dump(r.json()["response"], f, ensure_ascii=True, indent=4)
        

    def getPage(page, collection, url):

        r = requests.get(url + "&page=" + str(page), verify=False)
        print (r.status_code)
        for item in r.json()["response"]["docs"]:

            obj = {}
            obj["title"] = item["title_tesim"][0]
            obj["date"] = item["date_created_tesim"][0]
            #print (item)
            ref_id = item["archivesspace_record_tesim"][0]
            obj["thumb"] = "https://archives.albany.edu" + item["thumbnail_path_ss"]
            obj["url"] = "https://archives.albany.edu/concern/" + item["has_model_ssim"][0].lower() + "s/" + item["id"]
            
            record = client.get("repositories/2/find_by_id/archival_objects?ref_id[]=" + ref_id).json()
            ao = client.get(record["archival_objects"][0]["ref"]).json()
            print (ao["ref_id"])
            dateNormal = ao["dates"][0]["begin"]
            if "end" in ao["dates"][0].keys():
                dateNormal = dateNormal + "/" + ao["dates"][0]["end"]
            if "undated" in ao["dates"][0]["expression"].lower():
                obj["date_normal"] = "9999"
            else:
                obj["date_normal"] = dateNormal
            
            if date:
                if not obj["date"].lower() == "undated":
                    if obj["date"].lower().startswith("ca."):
                        objDate = obj["date"].split(" ")[1]
                    else:
                        if "-" in obj["date"]:
                            objDate = obj["date"].split("-")[0]
                        else:
                            objDate = obj["date"].split(" ")[0]
                    print (objDate)
                    try:
                        if "-" in date:
                            if int(objDate) >= int(date.split("-")[0]) and int(objDate) <= int(date.split("-")[1]):
                                collection.append(obj)
                        else:
                            if int(objDate) < int(date):
                                collection.append(obj)
                    except:
                        print ("Date Error: " + objDate)
            else:
                collection.append(obj)
        if r.json()["response"]["pages"]["last_page?"] == False:
            getPage(page + 1, collection, url)

    getPage(page, collection, url)
        
        
    #print (collection)
    sortedTitle = sorted(collection, key = lambda i: i['title'].split(" ")[0])
    sortedCollection = sorted(sortedTitle, key = lambda i: i['date_normal'].split(" ")[0])
    print (len(sortedCollection))

    with open(outFile, 'w', encoding='utf-8', newline='') as f:
        json.dump(sortedCollection, f, ensure_ascii=True, indent=4)
Example #18
0
import dacs
import time
import csv
import shutil
from git import Repo
from datetime import datetime
from subprocess import Popen, PIPE, STDOUT
import asnake.logging as logging
from asnake.client import ASnakeClient
#from asnake.aspace import ASpace

print (str(datetime.now()) + " Exporting Records from ArchivesSpace")

print ("\tConnecting to ArchivesSpace")
client = ASnakeClient(baseurl="http://localhost:8092",
                      username="******",
                      password="******")
client.authorize()
logging.setup_logging(stream=sys.stdout, level='INFO')

#repo = ASpace().repositories(3)

__location__ = os.path.dirname(os.path.realpath(__file__))

lastExportTime = time.time()
try:
    timePath = os.path.join(__location__, "lastExport.txt")
    with open(timePath, 'r') as timeFile:
        startTime = int(timeFile.read().replace('\n', ''))
        timeFile.close()
except:
from datetime import datetime
from asnake.client import ASnakeClient
from asnake.aspace import ASpace

import asnake.logging as logging
logging.setup_logging(level='DEBUG',
                      filename="remove_fake_wrapper.log",
                      filemode="a")

aspace = ASpace(baseurl="[ASPACE API URL]",
                username="******",
                password="******")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[ASPACE API URL]",
                             username="******",
                             password="******")
aspace_client.authorize()
#Set target repo
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

rl_repo = aspace.repositories(2)

#input is output of SQL query above
input_csv = input("Path to CSV Input: ")
#output will be input CSV plus some extra columns for reporting on actions taken, errors, etc.
updated_resources_csv = input("Path to CSV Output: ")


#Test if more than one direct child of Resource Object
Example #20
0
def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    catalog = {
        'linear': ['linear_feet', 'Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic': ['cubic_feet', 'Cubic Feet'],
        'gb': ['gigabytes', 'Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()

    data_list = []

    print('Compiling resource records from API...')

    for record in tqdm(res_records):
        res_record = client.get(
            'repositories/2/resources/{0}'.format(record)).json()
        try:
            extents = res_record['extents']
            for x in extents:
                if x['extent_type'] == 'megabytes':
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': str(float(x['number']) / 1000),
                        'units': 'gigabytes'
                    })
                else:
                    data_list.append({
                        'id': res_record['id_0'],
                        'amount': x['number'],
                        'units': x['extent_type']
                    })
        except:
            pass

    linear_ms = 0
    linear_ua = 0
    gb_ms = 0
    gb_ua = 0
    cubic_ms = 0
    cubic_ua = 0

    print('Analyzing extents in resource data...')

    for entry in data_list:
        try:
            if entry['id'].startswith(
                    'MS') and entry['units'] in catalog['linear']:
                linear_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['linear']:
                linear_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['gb']:
                gb_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['gb']:
                gb_ua += float(entry['amount'])
            elif entry['id'].startswith(
                    'MS') and entry['units'] in catalog['cubic']:
                cubic_ms += float(entry['amount'])
            elif entry['id'].startswith(
                    'UA') and entry['units'] in catalog['cubic']:
                cubic_ua += float(entry['amount'])
            else:
                pass
        except:
            exception = input(
                'Uh oh, looks like the analysis ran into a snag; most likely, '
                'a unit of extent for {0} ({1}) is not a pure number. Enter '
                '\'stop\' to kill the process so you can fix the record. Alternatively, '
                'you can enter \'continue\' to skip this entry and keep the analysis '
                'going.'.format(entry['id'], entry['amount']))
            if (exception.lower()).strip() == 'continue':
                pass
            elif (exception.lower()).strip() == 'stop':
                quit()

    report = {
        'MS Linear feet': round(linear_ms, 2),
        'UA Linear feet': round(linear_ua, 2),
        'Total linear feet': round((linear_ua + linear_ms), 2),
        'MS GB': round(gb_ms, 2),
        'UA GB': round(gb_ua, 2),
        'Total GB': round((gb_ms + gb_ua), 2),
        'MS Cubic feet': round(cubic_ms, 2),
        'UA Cubic feet': round(cubic_ua, 2),
        'Total Cubic feet': round((cubic_ua + cubic_ms), 2)
    }

    print('Generating report as JSON...')

    with open(('extent_calculator_' +
               (datetime.datetime.today().strftime('%Y-%m-%d')) + '.json'),
              'w') as f:
        json.dump(report, f)
    # create formatter and add it to the handlers
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    ch.setFormatter(formatter)
    # add the handlers to the logger
    logger.addHandler(fh)
    logger.addHandler(ch)

    config = configparser.ConfigParser()
    config.read('settings.ini')
    args.config = config

    try:
        client = ASnakeClient(
            baseurl=config['aspace_credentials']['api_host'],
            username=config['aspace_credentials']['username'],
            password=config['aspace_credentials']['password'])
    except KeyError as e:
        logger.error('settings.ini does not exist or is invalid')
        raise e

    # Simple sanity check to make sure client is setup
    try:
        resp = client.get('/')
        if not resp.ok:
            resp.raise_for_status()
    except:
        logger.error('Unable to contact ArchivesSpace instance at %s' %
                     config['aspace_credentials']['api_host'])
        raise APIContactError(
            'Unable to contact ArchivesSpace instance at %s' %
Example #22
0
            combined_aspace_id_clean = id_combined_regex.sub('', combined_id)
            if resource.json()["publish"] is True:
                if resource.status_code == 200:
                    export_ead = client.get(
                        "repositories/{}/resource_descriptions/{}.xml".format(
                            repo_id, resource_id),
                        params={
                            "include_unpublished": False,
                            "include_daos": True,
                            "numbered_cs": True,
                            "print_pdf": False,
                            "ead3": False
                        })
                    filepath = str(Path(source_path,
                                        combined_aspace_id_clean)) + ".xml"
                    with open(filepath, "wb") as local_file:
                        local_file.write(export_ead.content)
                        local_file.close()
                        print("Exported: {}".format(combined_id))
                else:
                    print(
                        "\nThe following errors were found when exporting {}:\n{}: {}\n"
                        .format(combined_id, resource, resource.text))
        print("-" * 100)


sourcepath = input("Enter folder path for exported EADs: ")
asp_client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw)
asp_client.authorize()
export_eads(asp_client, sourcepath)
Example #23
0
import csv, json

from asnake.client import ASnakeClient
client = ASnakeClient()
client.authorize()


def startCSV(CSV):
    '''Creates the CSV with field names and writes header'''
    fieldnames = [
        'lock_version', 'indicator', 'uri', 'collection_identifier',
        'series_identifier'
    ]
    with open(CSV, 'w', newline='') as outputCSV:
        writer = csv.DictWriter(outputCSV, fieldnames=fieldnames)
        writer.writeheader()


def addCSV(CSV, lock, ind, uri, coll_id, ser_id):
    '''Opens CSV, appends row'''
    fieldnames = [
        'lock_version', 'indicator', 'uri', 'collection_identifier',
        'series_identifier'
    ]
    with open(CSV, 'a', newline='') as outputCSV:
        writer = csv.DictWriter(outputCSV, fieldnames=fieldnames)
        writer.writerow({
            'lock_version': lock,
            'indicator': ind,
            'uri': uri,
            'collection_identifier': coll_id,
Example #24
0
import dacs
import time
import csv
import shutil

import csv
import requests
import json
from asnake.client import ASnakeClient
import asnake.logging as logging


print ("\tConnecting to ArchivesSpace")

client = ASnakeClient(baseurl="http://localhost:8092",
                      username="******",
                      password="******")
client.authorize()

logging.setup_logging(stream=sys.stdout, level='INFO')

with open('items_output.csv', mode='r') as csv_file:
    csv_reader = csv.DictReader(csv_file)
    line_count = 0
    for row in csv_reader:
        if line_count == 0:
            line_count += 1

  
        title=str(row['Title'])
        identifier=str(row['Identifier'])