Esempio n. 1
0
def test_trees():
    aspace = ASpace()
    resource_tree = aspace.repositories(2).resources(1).tree
    assert isinstance(resource_tree.children[0], TreeNode)
    records_via_walk = list(resource_tree.walk)
    assert aspace.repositories(2).resources(1).json() == records_via_walk[0].json()
    assert isinstance(records_via_walk[1], ComponentObject)
    subtree_walk = list(records_via_walk[1].tree.walk)
    assert records_via_walk[1].uri == subtree_walk[0].uri
Esempio n. 2
0
class ContainerCreator:
    def __init__(self, spreadsheet):
        config = configparser.ConfigParser()
        config.read('local_settings.cfg')
        self.aspace = ASpace(baseurl=config.get('ArchivesSpace', 'baseURL'), username=config.get('ArchivesSpace', 'username'), password=config.get('ArchivesSpace', 'password'))
        self.repo_id = config.get('ArchivesSpace', 'repository')
        self.container_data = openpyxl.load_workbook(spreadsheet).active

    def run(self):
        """
        Creates top containers and returns the URI. Or, if a top container with
        a matching barcode already exists, returns its URI instead.
        """
        out = {}
        for row in self.container_data.iter_rows():
            indicator = "R{}".format(row[2].value.lstrip("Reel ").lstrip("R"))
            barcode = row[9].value if row[9].value else None
            container_data = {"indicator": indicator,
                              "type": "reel",
                              "barcode": barcode,
                              "container_profile": {"ref": row[10].value}}
            new_container = self.aspace.client.post("repositories/{}/top_containers".format(self.repo_id), json=container_data).json()
            if new_container.get('error'):
                print(new_container['error'])
                new_container = self.get_existing_container(barcode).json()
            print({indicator: new_container.get('uri')})
            out[indicator] = new_container.get('uri')
            with open('created.txt', 'w') as out_file:
                out_file.write(json.dumps(out))
        print(out)

    def get_existing_container(self, barcode):
        results = self.aspace.repositories(self.repo_id).search.with_params(q='primary_type:top_container AND barcode_u_sstr:{}'.format(barcode))
        for result in results:
            return result
Esempio n. 3
0
def test_fetch():
    aspace = ASpace()
    assert isinstance(aspace.repositories, JSONModelRelation)
    resolved = list(aspace.repositories)
    assert resolved[0].jsonmodel_type == "repository"
    repo_id = resolved[0].uri.split("/")[-1]
    assert isinstance(aspace.repositories(repo_id), JSONModelObject)
Esempio n. 4
0
def main():
    """Main function, which is run when this script is executed"""
    parser = get_parser()
    args = parser.parse_args()
    aspace = ASpace(baseurl=config.get("ArchivesSpace", "baseurl"),
                    username=config.get("ArchivesSpace", "username"),
                    password=config.get("ArchivesSpace", "password"))
    process_tree(
        aspace.client,
        aspace.repositories(config.get(
            "ArchivesSpace", "repository")).resources(args.resource_id))
class ContainerDeleter:
    def __init__(self):
        config = configparser.ConfigParser()
        config.read('local_settings.cfg')
        self.aspace = ASpace(baseurl=config.get('ArchivesSpace', 'baseURL'),
                             username=config.get('ArchivesSpace', 'username'),
                             password=config.get('ArchivesSpace', 'password'))
        self.repo = self.aspace.repositories(
            config.get('ArchivesSpace', 'repository'))

    def run(self):
        for container in self.repo.search.with_params(
                q="types:top_container AND empty_u_sbool:true", all_ids=True):
            deleted = self.aspace.client.delete(container.uri)
            print(container.uri)
            with open('deleted.txt', 'w') as out_file:
                out_file.write(json.dumps(out))
Esempio n. 6
0
def main():
    #"""Main function, which is run when this script is executed"""
    start_time = time.time()
    parser = get_parser()
    args = parser.parse_args()
    global aspace
    aspace = ASpace(
        baseurl=config.get("ArchivesSpace", "baseURL"),
        username=config.get("ArchivesSpace", "user"),
        password=config.get("ArchivesSpace", "password"),
    )
    global writer
    writer = csv.writer(open(spreadsheet_path, "w"))
    create_spreadsheet(["Box Number", "Archival Object URI"])
    process_tree(
        args,
        aspace.repositories(config.get(
            "ArchivesSpace", "repository")).resources(args.resource_id))
    elapsed_time = time.time() - start_time
    print("Time Elapsed: " +
          time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
Esempio n. 7
0
class ContainerDeleter:
    def __init__(self):
        config = configparser.ConfigParser()
        config.read('local_settings.cfg')
        self.aspace = ASpace(baseurl=config.get('ArchivesSpace', 'baseURL'),
                             username=config.get('ArchivesSpace', 'username'),
                             password=config.get('ArchivesSpace', 'password'))
        self.repo = self.aspace.repositories(
            config.get('ArchivesSpace', 'repository'))

    def run(self):
        delete_count = 0
        for container in self.repo.search.with_params(
                q="types:top_container AND empty_u_sbool:true", all_ids=True):
            deleted = self.aspace.client.delete(container.uri)
            print(container.uri)
            delete_count += 1
        with open('deleted.txt', 'a') as out_file:
            today = str(date.today())
            out_file.write("Deleted {} top containers on {}.\n".format(
                delete_count, today))
Esempio n. 8
0
class LabelPrinter:
    def __init__(self, resource):
        config = configparser.ConfigParser()
        config.read("local_settings.cfg")
        self.aspace = ASpace(
            baseurl=config.get("ArchivesSpace", "baseURL"),
            username=config.get("ArchivesSpace", "user"),
            password=config.get("ArchivesSpace", "password"),
        )
        self.resource = self.aspace.repositories(2).resources(resource)

    def run(self):
        label_data = []
        resource_title = self.resource.title
        resource_id = self.get_id()
        for obj in self.resource.tree.walk:
            if len(obj.instances):
                parent = self.get_parent(obj)
                container = self.get_containers(obj)
                print(resource_title, resource_id, parent, container)
                label_data.append(
                    [resource_title, resource_id, parent, container])
        self.make_csv(set(tuple(row) for row in label_data))

    def make_csv(self, label_data):
        with open("box_labels.csv", "w") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(
                ["ResourceTitle", "ResourceID", "ParentTitle", "Container"])
            writer.writerows(label_data)

    def get_id(self):
        """
        Gets and returns the id of a resource record.
        """
        number = []
        for n in range(0, 3):
            try:
                number.append(getattr(self.resource, "id_{}".format(n)))
            except AttributeError:
                break
        return ":".join(number)

    def get_containers(self, obj):
        """
        Iterates throught the instances in an object and returns the container type and indicator.
        """
        containers = []
        for instance in obj.instances:
            try:
                top_container = instance.sub_container.top_container
                containers.append("{} {}".format(
                    top_container.type.capitalize(), top_container.indicator))
            except KeyError as e:
                pass
        return ", ".join(containers)

    def get_parent(self, obj):
        """
        Checks whether the object has a parent and returns it
        """
        try:
            return obj.parent.title
        except AttributeError:
            return ""
import json
import csv
import datetime
from datetime import timezone
import dateutil.parser
from asnake.aspace import ASpace
                
aspace = ASpace()
repo = aspace.repositories(2)

f=csv.writer(open('new_scua_accessions.csv','w', newline=''))

last_month = datetime.datetime.now(timezone.utc) + datetime.timedelta(-30)
for accession in repo.accessions:
    created = dateutil.parser.parse(accession.create_time)
    if created > last_month:
        id = accession.id_0+'-'+accession.id_1
        f.writerow([accession.title]+[accession.uri]+[id]+[accession.create_time])
Esempio n. 10
0
class DataFetcher:
    def __init__(self, resource_id):
        config = configparser.ConfigParser()
        config.read('local_settings.cfg')
        if os.path.isfile('data.csv'):
            raise Exception(
                "data.csv already exists and would be overwritten. Please move or delete this file before running the script again."
            )
        self.aspace = ASpace(baseurl=config.get('ArchivesSpace', 'baseURL'),
                             username=config.get('ArchivesSpace', 'username'),
                             password=config.get('ArchivesSpace', 'password'))
        self.repo = self.aspace.repositories(2)
        self.resource_id = int(resource_id)

    def run(self):
        writer = csv.writer(open('data.csv', 'w'))
        for record in self.repo.resources(self.resource_id).tree.walk:
            if record.jsonmodel_type == 'archival_object':
                data = self.get_object_data(record)
                print(data)
                writer.writerow(data)

    def get_object_data(self, obj):
        return [
            obj.uri, obj.resource.title, obj.ancestors[0].title,
            self.get_title(obj),
            self.get_dates(obj.dates),
            self.get_instances(obj.instances),
            self.get_folders(obj.instances),
            self.get_notes(obj.notes),
            self.get_location(obj.instances)
        ]

    def get_title(self, data):
        try:
            return data.title
        except:
            return data.display_string

    def get_dates(self, dates_array):
        dates = []
        for date in dates_array:
            try:
                dates.append(date.expression)
            except KeyError:
                try:
                    dates.append("{}-{}".format(date.begin, date.end))
                except KeyError:
                    dates.append(date.end)
        return ", ".join(dates)

    def get_instances(self, instances_array):
        instances = []
        for instance in instances_array:
            top_container = self.aspace.client.get(
                instance.sub_container.top_container.ref).json()
            instances.append(top_container['display_string'])
        return ", ".join(instances)

    def get_folders(self, instances_array):
        folders = []
        for instance in instances_array:
            folders.append("{} {}".format(instance.sub_container.type_2,
                                          instance.sub_container.indicator_2))
        return ", ".join(folders)

    def get_notes(self, notes_array):
        notes = []
        for note in notes_array:
            if note.type not in ['accessrestrict', 'userestrict']:
                if note.jsonmodel_type == 'note_singlepart':
                    notes.append("{}: {}".format(
                        note.type, " ".join([c for c in note.content])))
                else:
                    notes.append("{}: {}".format(
                        note.type,
                        " ".join([c.content for c in note.subnotes])))
        return ', '.join(notes)

    def get_location(self, instances_array):
        locations = []
        for instance in instances_array:
            top_container = self.aspace.client.get(
                instance.sub_container.top_container.ref).json()
            for loc in top_container['container_locations']:
                l = self.aspace.client.get(loc['ref']).json()
                locations.append(l['title'])
        return ", ".join(locations)
Esempio n. 11
0
            out[header] = cell_value(row[idx], header)
        yield out


if __name__ == '__main__':
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('update_containers')

    aspace = ASpace()

    log.info('start_ingest')

    for row in dictify_sheet(next(iter(args.excel))):
        try:
            container = aspace.repositories(args.repo_id).top_containers(
                row['Container Record ID']).json()
            container['barcode'] = row['Barcode']
        except (AttributeError, RuntimeError) as e:
            log.error('FAILED update_container',
                      response=container,
                      data=row,
                      exc_info=e)
            continue

        if row['Location']:
            container['container_locations'].append(
                JM.container_location(status='current',
                                      start_date=row['Location Start Date'],
                                      ref=f'/locations/{row["Location"]}'))

        res = aspace.client.post(container['uri'], json=container)
aspace = ASpace(baseurl=as_api, username=as_un, password=as_pw)
client = ASnakeClient(baseurl=as_api, username=as_un, password=as_pw)
client.authorize()
resource_ids = ["/repositories/4/resources/4103", "/repositories/4/resources/4064", "/repositories/4/resources/2798",
                "/repositories/4/resources/1001", "/repositories/4/resources/4048", "/repositories/2/resources/633",
                "/repositories/2/resources/723", "/repositories/2/resources/748", "/repositories/2/resources/414"]
# "/repositories/5/resources/5071" - UA collection - Steve to check with Kat

for resource_id in resource_ids:
    unknown_count = 0
    uri_breakup = resource_id.split("/")
    res_id = uri_breakup[4]
    repo_id = uri_breakup[2]
    try:
        rl_repo = aspace.repositories(repo_id)
        resource_record = rl_repo.resources(res_id).tree
        resource_tree = resource_record.walk
        print(rl_repo.resources(res_id).json()["title"])
        for node in resource_tree:
            ao_json = client.get(node.uri).json()
            for instance in ao_json["instances"]:
                if "sub_container" in instance.keys():
                    indicators = []
                    types = []
                    for key, value in instance["sub_container"].items():
                        if "indicator_" in key:
                            if "unknown container" == value:
                                child_type = "type_" + str(key[-1])
                                indicators.append(key)
                                types.append(child_type)
Esempio n. 13
0
def test_solr_route():
    aspace = ASpace()
    list(aspace.repositories(2).top_containers.search.with_params(q="barcode_field:1234"))
Esempio n. 14
0
def test_with_params():
    aspace = ASpace()
    list(aspace.repositories(2).search.with_params(q="primary_type:resource", fq="publish:true"))
Esempio n. 15
0
class InstanceSplitter:
    def __init__(self, separator, resource, containers_list):
        config = configparser.ConfigParser()
        config.read('local_settings.cfg')
        self.aspace = ASpace(baseurl=config.get('ArchivesSpace', 'baseURL'),
                             username=config.get('ArchivesSpace', 'username'),
                             password=config.get('ArchivesSpace', 'password'))
        self.repo = self.aspace.repositories(
            config.get('ArchivesSpace', 'repository'))
        self.separator = separator
        self.resource = resource
        self.containers_list = containers_list

    def run(self):
        with open(self.containers_list, 'r') as cl:
            containers = json.load(cl)
            for obj in self.repo.resources(self.resource).tree.walk:
                if len(obj.instances):
                    data = obj.json()
                    reel_numbers = self.get_reel_numbers(obj.instances)
                    for i, n in enumerate(reel_numbers):
                        try:
                            data['instances'][i]['sub_container'][
                                'top_container']['ref'] = containers[
                                    "R{}".format(n.lstrip("R-"))]
                        except KeyError:
                            print(
                                "Could not find top container matching indicator {} for object {}"
                                .format("R{}".format(n.lstrip("R-")), obj.uri))
                            continue
                        except IndexError:
                            data['instances'].append({
                                "instance_type":
                                data['instances'][0]['instance_type'],
                                "jsonmodel_type":
                                "instance",
                                "sub_container": {
                                    "jsonmodel_type": "sub_container",
                                    "top_container": {
                                        "ref":
                                        containers["R{}".format(
                                            n.lstrip("R-"))]
                                    }
                                }
                            })
                    updated = self.aspace.client.post(obj.uri,
                                                      json=data).json()
                    try:
                        print(updated['uri'])
                    except KeyError:
                        print(updated)

    def get_reel_numbers(self, instances):
        reel_numbers = []
        for instance in instances:
            if instance.instance_type == 'microform':
                top_container = self.aspace.client.get(
                    instance.sub_container.top_container.ref).json()
                for r in top_container.get('indicator').split(self.separator):
                    reel_numbers.append(r)
        return reel_numbers
Esempio n. 16
0
from asnake.client import ASnakeClient
import asnake.logging as logging
from asnake.aspace import ASpace
from configparser import ConfigParser, ExtendedInterpolation

logging.setup_logging(filename='logging.txt', level='INFO', filemode='a')
logger = logging.get_logger()

config = configparser.ConfigParser()
config.read('local_settings.cfg')

aspace = ASpace(baseurl=config['ArchivesSpace']['baseURL'],
                username=config['ArchivesSpace']['user'],
                password=config['ArchivesSpace']['password'])
repo = aspace.repositories(config['ArchivesSpace']['repository'])


def get_collection():
    """Returns a collection corresponding to an ID provided by user input"""
    try:
        identifier = input('Resource ID: ')
        return repo.resources(int(identifier))
    except Exception as e:
        raise Exception("Unable to get collection: {}".format(e))


def has_local_name(agent):
    """Loops through all the agent names and returns True if any of them have
    a source of `local`, `ingest`, `nad`, or `naf`"""
    for name in agent.names:
Esempio n. 17
0
import os
import json
import time
from asnake.aspace import ASpace
from asnake.utils import get_note_text
from configparser import ConfigParser

config = ConfigParser()
config.read("local_settings.cfg")

aspace = ASpace(
    baseurl=config.get("ArchivesSpace", "baseURL"),
    username=config.get("ArchivesSpace", "user"),
    password=config.get("ArchivesSpace", "password"),
)
repo = aspace.repositories(2)

repo = aspace.repositories(2)
start_time = time.time()


def note_content_identical(notes):
    content = []
    for note in notes:
        content.append(" ".join(get_note_text(note, aspace.client)))
    return len(set(content)) < len(notes)


def get_resources_notes():
    for object in aspace.resources:
        if object.id_0.startswith('FA') and object.publish:
Esempio n. 18
0
from datetime import datetime
from os.path import exists, expanduser, dirname, realpath
import os
import pprint
import json
from asnake.aspace import ASpace
logname = os.path.dirname(os.path.realpath(__file__)) + "/logs/sch_dos.log"
logging.basicConfig(filename=logname, level=logging.INFO)
main_log = logging.getLogger(__name__)
main_log.setLevel(logging.INFO)

pp = pprint.PrettyPrinter(indent=4)
repo_id = '/repositories/8'

aspace = ASpace()
repo = aspace.repositories(8)
yn = input("Repository " + repo.name + " continue? Y/N: ")
if yn.lower() != 'y':
    print("... exiting")
    sys.exit(0)
ctr = 0
for do in repo.search.with_params(
        q="primary_type:digital_object AND publish:false"):
    ctr += 1
    do_json = json.loads(do.json()['json'])
    do_uri = do_json['uri']
    #    print(do_uri)
    for fv in do_json['file_versions']:
        fv['publish'] = True
    do_json['publish'] = True
    main_log.info("Updating {} [{}]".format(do.title, do_uri))
Esempio n. 19
0
#
# The Special Collections @ DU ingest process requires a uri.txt file
# alongside the digital object files, so that the repository knows from which
# ArchivesSpace record it should be pulling metadata.
#
# This script iterates over a list of folders in a directory (provided by the
# user), searching for the call number in the folder's title and writing the
# URI it finds to that folder's uri.txt file.
#
###############################################################################

import json, os
from asnake.aspace import ASpace

AS = ASpace()
repo = AS.repositories(2)


def get_path():
    path = input('Path to the folder containing your objects: ')
    if not os.path.isdir(path):
        raise ValueError("Not a directory: {}".format(path))
    return path


# this is a bit janky, but the search API returns frontend *and* PUI results,
# so we need to filter out the PUI results for de-duplication
def search_for(object):
    r = AS.client.get('/repositories/2/search',
                      params={
                          'q': object,
Esempio n. 20
0
def test_stays_ref_on_repr():
    aspace = ASpace()
    agent = aspace.repositories(2).agent_representation
    assert(agent.is_ref)
    agent.__repr__()
    assert(agent.is_ref)
Esempio n. 21
0
class DateCalculator:
    def __init__(self, levels, always_add=False, resource=None):
        config = configparser.ConfigParser()
        config.read('local_settings.cfg')
        self.aspace = ASpace(baseurl=config.get('ArchivesSpace', 'baseURL'),
                             username=config.get('ArchivesSpace', 'username'),
                             password=config.get('ArchivesSpace', 'password'))
        self.repo = self.aspace.repositories(2)
        self.levels = levels if levels else LEVELS
        self.always_add = always_add
        self.resource = resource

    def run(self):
        """Main method for this class, which processes a list of objects."""
        for obj in self.get_objects():
            if (obj.level in self.levels) and (self.is_undated(obj)):
                date = self.calculate_date(obj.uri)
                if date:
                    obj_json = obj.json()
                    if self.always_add:
                        obj_json['dates'] = [date]
                    else:
                        obj_json['dates'].append(date)
                    self.save_obj(obj_json)
                print("Cannot calculate dates for {}".format(obj.uri))

    def get_objects(self):
        """
        If a resource is passed, fetches all the archival objects from that
        resource tree, otherwise returns all archival objects.
        """
        objects = self.repo.resources(
            self.resource
        ).tree.walk if self.resource else self.repo.archival_objects
        return objects

    def is_undated(self, obj):
        """
        Returns True unless there is a date object with a date expression that
        doesn't match common patterns for undated materials.
        """
        if not self.always_add:
            if len(obj.dates) > 0:
                for date in obj.dates:
                    if date.expression not in [
                            'undated', 'Undated', 'unknown', 'nd', 'n.d.'
                    ]:
                        return False
        return True

    def calculate_date(self, uri):
        """Calls the date calculator endpoint and returns a date object."""
        calculated = self.aspace.client.get('/date_calculator',
                                            params={
                                                'record_uri': uri
                                            }).json()
        date = None
        if 'error' in calculated:
            print(calculated)
        if (calculated.get('min_begin') and calculated.get('max_end')):
            expression = "{}-{}".format(calculated['min_begin'],
                                        calculated['max_end'])
            date = {
                'expression': expression,
                'begin': calculated['min_begin_date'],
                'end': calculated['max_end_date'],
                'date_type': 'inclusive',
                'label': 'creation'
            }
        return date

    def save_obj(self, obj_json):
        """Saves an updated object to ArchivesSpace"""
        updated = self.aspace.client.post(obj_json['uri'], json=obj_json)
        if updated.status_code == 200:
            print("Dates updated for {}".format(obj_json['uri']))
        else:
            print(updated.json())