def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    changes = {
        'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic_feet': ['Cubic Feet'],
        'gigabytes': ['Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()
    found_records = set([])

    for record in tqdm(res_records):
        rec_uri = 'repositories/2/resources/{0}'.format(record)
        res_record = client.get(rec_uri).json()
        updated_record = deepcopy(res_record)
        try:
            extents = res_record['extents']
            for ext_index, extent in enumerate(extents):
                for key, value in changes.items():
                    if extent['extent_type'] in value:
                        updated_record['extents'][ext_index][
                            'extent_type'] = key
                        break
                    else:
                        pass
            if res_record['extents'] != updated_record['extents']:
                response = client.post(rec_uri, json=updated_record)
                if response.status_code == 200:
                    logger.info('Extent change successfully pushed',
                                rec=record,
                                response=response)
                    found_records.add(record)
                else:
                    logger.info('Extent change failed',
                                rec=record,
                                response=response)
            else:
                pass
        except:
            pass

    print('{0} resource records checked; {1} records updated.'.format(
        len(res_records), len(found_records)))
                 types.append(child_type)
                 unknown_count += 1
     for indicator in indicators:
         try:
             del instance["sub_container"][indicator]
         except Exception as e:
             print("There was an error when deleting the unknown indicator: {}".format(e))
             print(instance)
     for child_type in types:
         try:
             del instance["sub_container"][child_type]
         except Exception as e:
             print("There was an error when deleting the unknown child/grandchild type: {}".format(e))
             print(instance)
     if indicators and types:
         update_ao = client.post(node.uri, json=ao_json).json()
         print(update_ao)
 else:
     indicators = []
     types = []
     for key, value in instance.items():
         if "indicator_" in key:
             if "unknown container" == value:
                 child_type = "type_" + str(key[-1])
                 indicators.append(key)
                 types.append(child_type)
                 unknown_count += 1
     for indicator in indicators:
         try:
             del instance[indicator]
         except Exception as e:
Esempio n. 3
0
# print instructions
print(
    'This script takes viafCorporateResults.csv and posts the organizations as corporate_entities to ArchivesSpace.'
)
input('Press Enter to continue...')

# This is where we connect to ArchivesSpace.
client = ASnakeClient()
client.authorize()  # login, using default values

targetFile = 'viafCorporateResults.csv'

csv = csv.DictReader(open(targetFile))

orgList = []
for row in csv:
    orgRecord = {}
    # changed this since ASpace doesn't come with 'viaf' as an option for source out of the box.
    source = 'naf' if row.get('lc') is not None else 'local'
    orgRecord['names'] = [{
        'primary_name': row['result'],
        'sort_name': row['result'],
        'source': source,
        'authority_id': row['lc']
    }]

    post = client.post('/agents/corporate_entities', json=orgRecord).json()
    print(post, '\n')

print("Check out your instance of ArchivesSpace to see what's new.")
def main(ID, path=None, accession=None):

    if path == None:
        if not os.path.isdir(defaultPath):
            raise Exception("ERROR: default path " + defaultPath +
                            " does not exist.")
        path = os.path.join(defaultPath, ID)
        if not os.path.isdir(path):
            raise Exception("ERROR: no " + ID +
                            " directory exists for ingest in " + defaultPath)
    else:
        if not os.path.isdir(path):
            raise Exception("ERROR: " + str(path) + " is not a valid path.")
    print("Reading " + path)

    if accession == None:
        print("Building SIP...")
        SIP = SubmissionInformationPackage()
        SIP.create(ID)
        SIP.package(path)
        print("SIP " + SIP.bagID + " created.")

    else:
        print("Reading accession " + accession)
        import asnake.logging as logging
        from asnake.client import ASnakeClient
        client = ASnakeClient()
        client.authorize()

        logging.setup_logging(stream=sys.stdout, level='INFO')

        call = "repositories/2/search?page=1&aq={\"query\":{\"field\":\"identifier\", \"value\":\"" + accession + "\", \"jsonmodel_type\":\"field_query\"}}"
        accessionResponse = client.get(call).json()
        if len(accessionResponse["results"]) < 1:
            raise Exception("ERROR: Could not find accession with ID: " +
                            accession)
        else:
            accessionObject = json.loads(
                accessionResponse["results"][0]["json"])
            if "id_1" in accessionObject.keys():
                accessionID = accessionObject["id_0"] + "-" + accessionObject[
                    "id_1"]
            if accession != accessionID:
                raise Exception(
                    "ERROR: Could not find exact accession with ID: " +
                    accession)
            if not "content_description" in accessionObject.keys():
                raise Exception("ERROR: no content description in " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            if len(accessionObject["related_resources"]) < 1:
                raise Exception("ERROR: no related resource for " +
                                accessionID + " accession, " +
                                accessionObject["uri"])
            else:
                resource = client.get(
                    accessionObject["related_resources"][0]["ref"]).json()
                creator = resource["title"]
                if not ID.lower() == resource["id_0"].lower():
                    raise Exception("ERROR: accession " + accessionID +
                                    " does not link to collection ID " + ID +
                                    ". Instead linked to " + resource["id_0"])
                description = accessionObject["content_description"]

                print("Building SIP...")
                SIP = SubmissionInformationPackage()
                SIP.create(ID)
                SIP.package(path)
                print("SIP " + SIP.bagID + " created.")

                SIP.bag.info["Accession-Identifier"] = accessionID
                SIP.bag.info["ArchivesSpace-URI"] = accessionObject["uri"]
                SIP.bag.info["Records-Creator"] = creator
                SIP.bag.info["Content-Description"] = description
                if "condition_description" in accessionObject.keys():
                    SIP.bag.info["Condition-Description"] = accessionObject[
                        "condition_description"]
                if "provenance" in accessionObject.keys():
                    SIP.bag.info["Provenance"] = accessionObject["provenance"]
                if "general_note" in accessionObject.keys():
                    SIP.bag.info["General-Note"] = accessionObject[
                        "general_note"]
                SIP.bag.info["Source-Location"] = path
                SIP.bag.info[
                    "Transfer-Method"] = "https://github.com/UAlbanyArchives/ingest-processing-workflow/ingest.py"

    print("Writing checksums...")
    SIP.bag.save(manifests=True)
    print("SIP Saved!")

    # List files in txt for processing
    print("(not) Listing files for processing...")
    #listFiles(ID)

    if accession == None:
        SIP.extentLog(
            "/media/SPE/DigitizationExtentTracker/DigitizationExtentTracker.xlsx"
        )
        print("Logged ingest to DigitizationExtentTracker.")
    else:
        print("Updating accession " + accessionID)
        if "disposition" in accessionObject.keys():
            accessionObject["disposition"] = accessionObject[
                "disposition"] + "\n" + str(SIP.bagID)
        else:
            accessionObject["disposition"] = str(SIP.bagID)

        totalSize = SIP.size()
        inclusiveDates = SIP.dates()
        extent = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[0]),
            "extent_type": str(totalSize[1])
        }
        extentFiles = {
            "jsonmodel_type": "extent",
            "portion": "whole",
            "number": str(totalSize[2]),
            "extent_type": "Digital Files"
        }
        if inclusiveDates[0] == inclusiveDates[1]:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "expression": inclusiveDates[0]
            }
        else:
            date = {
                "jsonmodel_type": "date",
                "date_type": "inclusive",
                "label": "creation",
                "begin": inclusiveDates[0],
                "end": inclusiveDates[1]
            }
        if "extents" in accessionObject.keys():
            accessionObject["extents"].append(extent)
            accessionObject["extents"].append(extentFiles)
        else:
            accessionObject["extents"] = [extent, extentFiles]
        accessionObject["dates"].append(date)

        updateAccession = client.post(accessionObject["uri"],
                                      json=accessionObject)
        if updateAccession.status_code == 200:
            print("\tSuccessfully updated accession " + accessionID)
        else:
            print(updateAccession.text)
            print("\tERROR " + str(updateAccession.status_code) +
                  "! Failed to update accession: " + accessionID)

    return SIP
Esempio n. 5
0
import json, csv, runtime
from asnake.client import ASnakeClient
# print instructions
print(
    'This script replaces existing fauxcodes with real barcodes (linked in a separate csv file) in ArchivesSpace.'
)
input('Press Enter to connect to ArchivesSpace and post those barcodes...')

# This is where we connect to ArchivesSpace.  See authenticate.py
client = ASnakeClient()
client.authorize()

# open csv and generate dict
reader = csv.DictReader(open('barcodes.csv'))

# GET each top_container listed in top_containers and add to records
print('The following barcodes have been updated in ArchivesSpace:')
for row in reader:
    uri = row['uri']
    container = client.get(uri).json()
    container['barcode'] = row['real']
    post = client.post(uri, json=container).json()
    print(post)
Esempio n. 6
0
        title=str(row['Title'])
        identifier=str(row['Identifier'])
        urlRaw=str(row['url']
# Have to clean the URL        
        url=urlRaw.replace("/api/items","/items/show")
#        print(title+identifier+url)

#        file_version = {
#                "jsonmodel_type":"digital_object",
#                "file_uri":url,
#                "is_representative":False,
#                "caption":title+" ["+url+"]",
#                "use_statement":"Image-Service",
#                "publish":True}

        data = { "jsonmodel_type":"digital_object",
                "file_versions": [{
                "jsonmodel_type":"file_version",
                "file_uri":url,
                "is_representative":False,
                "caption":title+" ["+url+"]",
                "use_statement":"Image-Service",
                "publish":True}],
                "digital_object_id":identifier,
                "title":title}

        r = client.post('repositories/3/digital_objects', json.dumps(data))

        print(json.dumps(data))

Esempio n. 7
0
    
    idList = []
    for aID in accessions:
        entry = client.get("repositories/2/accessions/" + str(aID)).json()
        if entry["accession_date"].split("-")[0] == "2019":
            idList.append(int(entry["id_1"]))
    newID = max(idList) + 1
    if len(str(newID)) == 1:
        newID = "00" + str(newID)
    elif len(str(newID)) == 2:
        newID = "0" + str(newID)
    print ("Creating new accession " + year + "-" + str(newID) + "...")
    newAccession["id_0"] = year
    newAccession["id_1"] = str(newID)
    accessionID = year + "-" + str(newID)
    updateAccession = client.post("repositories/2/accessions", json=newAccession)
    if updateAccession.status_code == 200:
        print ("\tSuccessfully updated accession " + newAccession["id_0"] + "-" + newAccession["id_1"])
    else:
        print ("\tERROR " + str(updateAccession.status_code) + "! Failed to update accession: " + newAccession["id_0"] + "-" + newAccession["id_1"])
        
    print ("Waiting for new accession to be indexed...")
    time.sleep(120)
    
print ("Ingesting records at " + path)
arrangementSwitch = False
arrangementsPath = "/media/Masters/Archives/arrangements"
arrangements = os.path.join(arrangementsPath, args.ID.upper())
if os.path.isdir(arrangements):
    arrangementSwitch = True
Esempio n. 8
0
                # Remove normalized end date
                try:
                    if date['end']:
                        print('normalized end date exists; will be removed')
                        date.pop('end')
                        update = True
                except KeyError:
                    print('no normalized end date')

                print()
                print('Dates will be updated to:')
                print(date)
                print()

        if update:
            update = client.post(collectJson['uri'], json=collectJson)
            print(update.status_code)

            # Print all dates
            num = collectJson['uri'].replace('/repositories/2/resources/', '')
            collection = repo.resources(num)
            for date in collection.dates:
                date_type = ''
                date_expression = ''
                date_begin = ''
                date_end = ''

                try:
                    date_type = date.date_type
                    date_expression = date.expression
                    date_begin = date.begin
Esempio n. 9
0
import json
import time
from asnake.client import ASnakeClient

startTime = time.time()

client = ASnakeClient()
client.authorize()

records = json.load(open('all_AOs.json'))
for record in records:
    post = client.post('/repositories/3/archival_objects', json=record).json()
    print(post)

elapsedTime = time.time() - startTime
m, s = divmod(elapsedTime, 60)
h, m = divmod(m, 60)
print('Total script run time: ', '%d:%02d:%02d' % (h, m, s))
Esempio n. 10
0
from asnake.client import ASnakeClient
from secrets import *

as_username = input("ArchivesSpace username: "******"ArchivesSpace password: "******"repositories").json()
print("Publishing Digital Objects...", end='', flush=True)
for repo in repos:
    digital_object = {}
    dig_objs_per_repo = []
    repo_digital_objects = client.get(repo["uri"] + "/digital_objects?all_ids=true").json()
    for dig_obj_id in repo_digital_objects:
        object_request = repo["uri"] + "/digital_objects/" + str(dig_obj_id) + "/publish"
        try:
            client.post(object_request)
        except Exception as e:
            print("Error found when requesting id: " + str(e) + "\n" + object_request)
    #     digital_object[dig_obj_id] = client.get(repo["uri"] + "/digital_objects/" + str(dig_obj_id)).json()
    #     dig_objs_per_repo.append(digital_object)
    # repo_dig_objects[repo['name']] = dig_objs_per_repo
print("Done")
# print(json_data)
Esempio n. 11
0
})
# Can't use get_paged because this endpoint returns raw Solr
results = client.get(endpoint, params={
    'aq': advanced_query
}).json()["response"]["docs"]

# populate top_containers with the ids of each top_container in search results
top_containers = []
for value in gen_dict_extract('id', results):
    top_containers.append(value)

# GET each top_container listed in top_containers and add to records
records = []
for top_container in top_containers:
    output = client.get(top_container).json()
    records.append(output)

# have user enter container profile id
profile_id = input(
    'Enter container profile ID (I am going to enter 9. You can select another value, as long that ID is in your instance of ArchivesSpace.): '
)

# Add container profile to records and post
print('The following records have been updated in ArchivesSpace:')
for record in records:
    record['container_profile'] = {'ref': '/container_profiles/' + profile_id}
    jsonLine = record
    uri = record['uri']
    post = client.post(uri, json=jsonLine).json()
    print(post)
Esempio n. 12
0
from openpyxl import load_workbook
from secrets import *
from asnake.aspace import ASpace
from asnake.client import ASnakeClient

aspace = ASpace(baseurl=as_api, username=as_un, password=as_pw)
client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw)
client.authorize()

resource_id = input("Enter ASpace URI: ")
excel_filepath = input("Enter full filepath for spreadsheet: ")
wb = load_workbook(excel_filepath)
sheet = wb.active
for row in sheet.iter_rows(min_row=2, values_only=True):
    archival_object = client.get(row[0]).json()
    print("Converting: {} > {} ... ".format(
        archival_object["instances"][0]["sub_container"]["indicator_2"],
        row[5]),
          end='',
          flush=True)
    archival_object["instances"][0]["sub_container"]["indicator_2"] = str(
        row[5])
    update_ao = client.post(row[0], json=archival_object)
    print("Done. Response: {}".format(update_ao.json()))
Esempio n. 13
0
# Note: if this script is re-run to create new digital objects or updated to include additional rows
# (e.g. after changing the do object IDs, or if you attempt to add another row that references a preceding archival object)
# then only the most-recently created digital objects will be linked
# and the previously-created digital objects will be orphaned records.

# Parse csv and update ArchivesSpace.
for row in csv_dict:
    file_uri = row['fileuri']
    title = row['title']
    digital_object_id = row['objectid']
    ref_ID = row['refID']
    # Construct new digital object from csv
    doRecord = {'title': title, 'digital_object_id': digital_object_id, 'publish': False}
    doRecord['file_versions'] = [{'file_uri': file_uri, 'publish': False, 'file_format_name': 'jpeg'}]

    doPost = client.post('/repositories/2/digital_objects', json=doRecord).json()
    print(doPost)
    # Store uri of newly posted digital objects because we'll need it
    uri = doPost['uri']
    # Find AOs based on refIDs supplied in csv
    AOQuery = json.dumps({
        "query": {
            "jsonmodel_type":"boolean_query",
            "op":"AND",
            "subqueries":[
                {
                    "jsonmodel_type":"field_query",
                    "field":"primary_type",
                    "value":"archival_object",
                    "literal":True
                },
Esempio n. 14
0
                  repository_processing_note)
            raw_input = input("Proceed anyway? y/n?")
            if raw_input == "n":
                break
            else:
                pass
        except:
            pass

#Test for unpublished nodes, only proceed with publish and export if no unpublished nodes
        if has_unpublished_nodes() == False:

            #If the finding aid status is already set to publish, just export the EAD
            if "published" in publish_status:
                # Set publish to 'true' for all levels, components, notes, etc.  Same as clicking "publish all" in staff UI
                resource_publish_all = aspace_client.post(resource_uri +
                                                          '/publish')
                print(eadID + ' | resource and all children set to published')
                #Pause for 10 seconds so publish action takes effect...maybe?
                print(eadid +
                      " | Pausing for 10 seconds to index publish action...")
                time.sleep(10.0)
                print(eadID + " | Exporting EAD file...")
                ead = aspace_client.get(id_uri_string + '.xml' +
                                        export_options).text
                f = io.open(destination + eadID + '.xml',
                            mode='w',
                            encoding='utf-8')
                f.write(ead)
                f.close()
                print(eadID + '.xml'
                      ' | ' + resource_id + ' | ' + aspace_id_short + ' | ' +
print("Logged into: " + repo['name'])

print("Getting list of resources...")
resources_list = aspace_client.get(
    "repositories/2/resources?all_ids=true").json()
resources_sorted = sorted(resources_list, reverse=True)

for resource in resources_sorted:

    try:
        resource_json = aspace_client.get("repositories/2/resources/" +
                                          str(resource)).json()
        #print (resource_json)
        resource_uri = resource_json['uri']
        print("updating: " + resource_uri)
        resource_update = aspace_client.post(resource_json['uri'],
                                             json=resource_json)
        response = resource_update.json()
        logger.info('update_resource',
                    action='updating-resource',
                    data={
                        'resource_uri': resource_uri,
                        'response': response
                    })
        print(response['status'])
    except:
        print("ERROR")
        pass
print("All Done with Resources...")

print("Getting list of archival_objects...")
ao_list = aspace_client.get(
Esempio n. 16
0
import json, time, runtime
from asnake.client import ASnakeClient
from asnake.client.web_client import ASnakeAuthError

# Create a client
client = ASnakeClient()
client.authorize()  # login, using default values

# print instructions
print ("This script will add the container_profiles included in a separate json file to ArchivesSpace.")
input("Press Enter to continue...")

# post container_profiles
print ("The following container profiles have been added to ArchivesSpace:")
jsonfile = open("containerProfiles.json")
jsonfile = json.load(jsonfile)
for container_profile in jsonfile:
    post = client.post("/container_profiles", json=container_profile).json()
    print (post)

print ("You've just completed your first API POST.  Congratulations!")
Esempio n. 17
0
        except:
            properDates = ''
        viafid = response['viafID']
    except:
        label = ''
        viafid = ''
    if viafid != '':
        links = json.loads(
            requests.get('http://viaf.org/viaf/' + viafid +
                         '/justlinks.json').text)
        viafid = 'http://viaf.org/viaf/' + viafid
    toPost = {
        "lock_version":
        lockVersion,
        "names": [{
            "primary_name": properPrimary.strip(),
            "rest_of_name": properSecondary.strip(),
            "dates": properDates.strip(),
            "sort_name": properName,
            "authorized": True,
            "is_display_name": True,
            "source": "viaf",
            "rules": "dacs",
            "name_order": "inverted",
            "jsonmodel_type": "name_person",
            "authority_id": viafid
        }]
    }
    post = client.post(uri, json=toPost).json()
    print(post)
                  str(len(children_of_fake_wrapper_component_dict)) +
                  " Direct Children of Wrapper Component")
            row.append(
                str(len(children_of_fake_wrapper_component_dict)) +
                " children of Wrapper AO")
            print("Reposting children as direct children of resource...")

            #Capture any error responses and save to output CSV report
            success_ao_keys_list = []
            fail_ao_keys_list = []

            for key, value in children_of_fake_wrapper_component_dict.items():
                #Repost AOs as direct children of Resource (to flatten the tree) based on Dict info
                update_resource = aspace_client.post(resource_object.uri +
                                                     '/accept_children',
                                                     params={
                                                         'children[]': key,
                                                         'position': value
                                                     }).json()
                try:
                    if update_resource['status'] == "Updated":
                        print("Status: " + update_resource['status'])
                        success_ao_keys_list.append(key)
                    else:
                        print("NOT UPDATED")
                        fail_ao_keys_list.append(key)
                except:
                    print("ERROR REPOSTING AO")
                    fail_ao_keys_list.append(key)

            if len(fail_ao_keys_list) != 0:
                row.append("ERROR Posting AOs: " + str(fail_ao_keys_list))
Esempio n. 19
0
            print('Digital object already exists.')
        else:
            doPost['digital_object_id'] = doid
            doPost['title'] = 'Web crawl of ' + crawl['original']
            doPost['dates'] = [{
                'expression': crawl['timestamp'],
                'date_type': 'single',
                'label': 'creation'
            }]
            doPost['file_versions'] = [{
                'file_uri': crawl['filename'],
                'checksum': crawl['digest'],
                'checksum_method': 'sha-1'
            }]
        if doPost != {}:
            post = client.post('/repositories/2/digital_objects',
                               json=doPost).json()
            print(post)
            doItem = {}
            doItem['digital_object'] = {'ref': post['uri']}
            doItem['instance_type'] = 'digital_object'
            newInstances.append(doItem)
    aoGet = client.get(uri).json()
    existingInstances = aoGet['instances']
    existingInstances = existingInstances + newInstances
    aoGet['instances'] = existingInstances
    aoUpdate = client.post(uri, json=aoGet).json()
    print('The following archival objects have been updated in ArchivesSpace:')
    print(aoUpdate)

# TO DO LATER
# Parse dates for ArchivesSpace record, push to AOs above