Ejemplo n.º 1
0
    def __init__(self, **config):
        global log

        if 'config_file' in config:
            self.config = conf.ASnakeConfig(config['config_file'])
        else:
            self.config = conf.ASnakeConfig()

        self.config.update(config)

        # Only a subset of logging config can be supported in config
        # For more complex setups (configuring output format, say),
        # configure logs in Python code prior to loading
        #
        # Properties supported are:
        #    filename, filemode, level, and default_config
        # Default config can be any of the default configurations exposed in logging
        if not log:
            if not logging.already_configured and 'logging_config' in self.config:
                if 'default_config' in self.config['logging_config']:
                    default_logging_config = logging.configurations.get(
                        self.config['logging_config']['default_config'])
                    del self.config['logging_config']['default_config']
                else:
                    default_logging_config = None

                logging.setup_logging(config=default_logging_config,
                                      **self.config['logging_config'])

            log = logging.get_logger(__name__)

        if not hasattr(self, 'session'): self.session = Session()
        self.session.headers.update({
            'Accept': 'application/json',
            'User-Agent': 'ArchivesSnake/0.1'
        })
        log.debug("client created")
Ejemplo n.º 2
0
    return {
        k: (json.dumps(v) if k in ('ao_ids', 'component_ids') else v)
        for k, v in row.items()
    }


def chain_aos(for_aos):
    for row in for_aos:
        yield from row['ao_ids']


if __name__ == '__main__':
    args = ap.parse_args()

    setup_logging(filename=args.logfile)
    log = get_logger('map_box_numbers')

    log.info('start')

    aspace = ASpace()
    log.info('aspace_connect')

    # note: fields match up to fields in MySQL query plus additional field for
    in_fields = [
        'container_id', 'barcode', 'component_ids', 'ao_ids', 'shared'
    ]
    out_fields = (
        *in_fields[0:2],
        'proposed_box_number',
        *in_fields[2:],
    )
Ejemplo n.º 3
0
#!/usr/bin/env python
import json, glob, datetime, re, os
import asnake.logging as logging

# set up logging. mutter profanity.

logname = 'logs/uploading_updated_resources_' + datetime.datetime.now(
).strftime('%Y-%m-%d-T-%H-%M') + '.log'

logfile = open(logname, 'w')
logging.setup_logging(stream=logfile)
logger = logging.get_logger('upload_updated_resources')

# add ASnake Client
from asnake.client import ASnakeClient
# validate ASnake client
client = ASnakeClient()
client.authorize()


def upload_updated_resources(file_directory, file_prefix, repo_num):
    '''This moves to, then scans the entire directory which the user has supplied and globs JSON files. It gets the resource number from using the prefix which the person supplied.'''
    filename_strip = '.*' + file_prefix
    os.chdir(file_directory)
    resources = glob.glob('*.json')
    for file in resources:
        res_num = file.rstrip('.json')
        res_num = re.sub(filename_strip, '', res_num)
        resource = json.load(open(file))
        response = client.post('repositories/' + repo_num + '/resources/' +
                               res_num,
from tqdm import tqdm
import datetime
from copy import deepcopy
from asnake.client import ASnakeClient
import asnake.logging as logging
today_date = datetime.datetime.today().strftime('%Y-%m-%d')
logging.setup_logging(filename='extent_type_changer_' + str(today_date) +
                      '.log')
logger = logging.get_logger('extent_type_changes_log')


def main():
    client = ASnakeClient(baseurl='XXXX', username='******', password='******')
    client.authorize()

    changes = {
        'linear_feet': ['Linear Feet', 'linear ft.', 'Linear Foot'],
        'cubic_feet': ['Cubic Feet'],
        'gigabytes': ['Gigabytes']
    }

    res_records = (client.get('repositories/2/resources',
                              params={'all_ids': True})).json()
    found_records = set([])

    for record in tqdm(res_records):
        rec_uri = 'repositories/2/resources/{0}'.format(record)
        res_record = client.get(rec_uri).json()
        updated_record = deepcopy(res_record)
        try:
            extents = res_record['extents']
Ejemplo n.º 5
0
    return instance


def populate_skiplists(log_entries):
    for entry in log_entries:
        if entry['event'] in {'create_container', 'skip_container'}:
            temp_id2id[entry['temp_id']] = entry['id']
        if entry['event'] in {'update_ao', 'skip_ao'}:
            ao_processed.add(entry.get('ao_id', entry.get(
                'id', None)))  # id was used in in early versions of script


if __name__ == '__main__':
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('import_container_data')
    log.info('start_ingest')
    aspace = ASpace()

    # Global variables referenced from local functions
    temp_id2id = {}
    ao_processed = set()
    failures = set()

    if args.skip_via_log:
        with open(expanduser(args.skip_via_log)) as f:
            populate_skiplists(map(json.loads, f))

    ao_sheet, container_sheet = args.excel
    # containers
    for c_row in dictify_sheet(container_sheet):
#!/usr/bin/env python

import re
from tqdm import tqdm
import pandas as pd
import datetime
import asnake.logging as logging
today_date = datetime.datetime.today().strftime('%Y-%m-%d')
logging.setup_logging(filename='comma_end_logfile_funct_' + str(today_date) +
                      '.log')
logger = logging.get_logger('comma_end_changes_log')
from asnake.client import ASnakeClient

client = ASnakeClient(baseurl='xxx', username='******', password='******')
client.authorize()


def pattern_matcher(x):
    """Match a resource title that ends with a comma."""
    pattern_match = re.compile(r'^.*\,$')
    result = pattern_match.match(x)
    return result


def extract_resources(y):
    """Look for ArchivesSpace resources that match pattern_matcher, then save them in a list and generate a CSV report."""
    if y == 'resources':
        obj_type = 'resource_records'
        all_records = client.get('repositories/2/resources',
                                 params={
                                     'all_ids': True
Ejemplo n.º 7
0
#!/usr/bin/env python
import glob, json, datetime

# Setting up the log
import asnake.logging as logging

logname = 'logs/new_subject_upload_' + datetime.datetime.now().strftime(
    '%Y-%m-%d-T-%H-%M') + '.log'

logfile = open(logname, 'w')
logging.setup_logging(stream=logfile)
logger = logging.get_logger("upload-new-subjects")

# Bring in the client to work at a very basic level.
from asnake.client import ASnakeClient

# Create and authorize the client
client = ASnakeClient()
client.authorize()

# actually run the upload. Simply sets up the log, gathers the JSON, and then uploads each. This is a simple post because it's creating new ones and doesn't need any kind of number.


def upload_json_as_new_subjects(file_dir, batch):
    logger.info("upload_start", batch_name=batch)
    subjects = glob.glob(
        file_dir + "/" + "*.json"
    )  # globs all the .json objects in the directory where the files are located.
    for file in subjects:
        subject = json.load(open(file))
        response = client.post('subjects', json=subject).json()
                help='path to print log to')
ap.add_argument('--green_containers',
                help="Excel file with container barcodes of interest")


def top_container_barcodes(excel_filename):
    xl = load_workbook(excel_filename)
    rows = iter(xl.worksheets[0])
    next(rows)  # skip header
    return ",".join(f"'{row[0].value}'" for row in rows)


if __name__ == '__main__':
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('green_barcodes_cid2bc_and_components')

    log.info('start')

    conn = pymysql.connect(host=args.host,
                           user=args.user,
                           database=args.database,
                           cursorclass=pymysql.cursors.DictCursor,
                           password=getpass(
                               "Please enter MySQL password for {}: ".format(
                                   args.user)))
    log.info('mysql_connect')

    with open('green_cid2bc_and_components.csv', 'w') as gc2bac_report, conn:
        db = conn.cursor()
Ejemplo n.º 9
0
                log.info('ao_update_failed',
                         ao=res.json(),
                         status_code=res.status_code)
                failures[barcode].append(ao_info)
    else:
        log.info('create_tc_failed',
                 tc=res.json(),
                 status_code=res.status_code)
        for ao_info in ao_infos:
            failures[barcode].append(ao_info)


if __name__ == "__main__":
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('barcodes_report')

    log.info('start')

    aspace = ASpace()
    log.info('aspace_connect')

    bc_csv_fields = [
        'original_barcode', 'original_container_id', 'location_id',
        'new_barcode', 'new_container_id', 'box_number', 'component_id',
        'ao_id'
    ]

    loc_csv_fields = ['location_barcode', 'location_id']

    # Barcodes expected to be in first column of single-worksheet excel
Ejemplo n.º 10
0
#!/usr/bin/env python
import glob, json, datetime

# Setting up the log
import asnake.logging as logging

logname = 'logs/new_subject_upload_' + datetime.datetime.now().strftime(
    '%Y-%m-%d-T-%H-%M') + '.log'

logfile = open(logname, 'w')
logging.setup_logging(stream=logfile)
logger = logging.get_logger('upload-new-subjects')

# Bring in the client to work at a very basic level.
from asnake.client import ASnakeClient

# Create and authorize the client
client = ASnakeClient()
client.authorize()


def upload_json_as_new_subjects(file_dir, batch):
    '''Actually run the upload. Simply sets up the log, gathers the JSON, and then uploads each. This is a simple post because it's creating new ones and doesn't need any kind of number.'''
    logger.info("upload_start", batch_name=batch)
    subjects = glob.glob(
        file_dir + '/' + '*.json'
    )  # globs all the .json objects in the directory where the files are located.
    for file in subjects:
        subject = json.load(open(file))
        response = client.post('subjects', json=subject).json()
        response['title'] = subject['title']
Ejemplo n.º 11
0
                help="Spreadsheet of location attrs")
ap.add_argument('--host', default='localhost', help="host of ASpace database")
ap.add_argument('--user',
                default='pobocks',
                help='MySQL user to run as when connecting to ASpace database')
ap.add_argument('--database',
                default='tuftschivesspace',
                help="Name of MySQL database")
ap.add_argument('--logfile',
                default='create_locations.log',
                help='path to print log to')

if __name__ == "__main__":
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('create_locations')

    log.info('start')

    aspace = ASpace()
    log.info('aspace_connect')

    log.info('process_spreadsheet')
    rows = args.spreadsheet.worksheets[0].values
    headers = dict(enumerate(first(rows)))
    JSONS = []

    conn = pymysql.connect(host=args.host,
                           user=args.user,
                           database=args.database,
                           cursorclass=pymysql.cursors.DictCursor,
Ejemplo n.º 12
0
#!/usr/bin/env python
import json, csv, datetime
import asnake.logging as logging

# set up logging. mutter profanity.

logname = 'logs/deleting_subjects_' + datetime.datetime.now().strftime(
    '%Y-%m-%d-T-%H-%M') + '.log'

logfile = open(logname, 'w')
logging.setup_logging(stream=logfile)
logger = logging.get_logger('delete_subjects')

# add ASnake Client
from asnake.client import ASnakeClient
# validate ASnake client
client = ASnakeClient()
client.authorize()

# expects a CSV file with column subject_id of subjects to be deleted


def delete_subjects(data):
    '''This opens the CSV file, reads the subject_id column, deletes subject, and logs response. Records target ID in case it's not found or other error.'''
    with open(data, newline='') as data:
        reader = csv.DictReader(data)
        for row in reader:
            sub_id = str(row['subject_id'])  #typing just in case
            response = client.delete('subjects/' + sub_id).json()
            logger.info('delete',
                        target=sub_id,
# Sample data:
# location_uri,id
# /locations/6423,6631
# /locations/4025,24592|23842|23232
#  Admin rights seem to be needed to run these updates.
# See README.md for information on preparing the data.

from asnake.client import ASnakeClient
client = ASnakeClient()
client.authorize()

logname = 'logs/update_top_containers_' + datetime.datetime.now().strftime(
    '%Y-%m-%d-T-%H-%M') + '.log'
logfile = open(logname, 'w')
logging.setup_logging(stream=logfile)
logger = logging.get_logger('batch-update-top-containers')


def post_batch_updates(csvName, batch, repo_num):
    '''Starts logger batch, opens CSV and reads lines. Creates an integer-based list of IDs from column 'id'. Posts updates to ASpace, collects response and writes it out to log along with info about which resources were updated (since this is not part of ASpace response). Closes logfile.'''
    logger.info('updates', batch_name=batch)
    with open(csvName, newline='') as data:
        reader = csv.DictReader(data)
        for row in reader:
            id_group = [
            ]  # ASpace is really particular that it get a list of integers and no dang strings.
            for id in row['id'].split("|"):
                id_group.append(int(id))
            location = row['location_uri']
            response = client.post('repositories/' + repo_num +
                                   '/top_containers/batch/location',
)
ap.add_argument('--host', default='localhost', help="host of ASpace database")
ap.add_argument('--user',
                default='pobocks',
                help='MySQL user to run as when connecting to ASpace database')
ap.add_argument('--database',
                default='tuftschivesspace',
                help="Name of MySQL database")
ap.add_argument('--logfile',
                default='dupe_report.log',
                help='path to print log to')

if __name__ == '__main__':
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('report_duplicates')

    log.info('start')

    aspace = ASpace()
    log.info('aspace_connect')

    log.info('end')

    conn = pymysql.connect(host=args.host,
                           user=args.user,
                           database=args.database,
                           cursorclass=pymysql.cursors.DictCursor,
                           password=getpass(
                               "Please enter MySQL password for {}: ".format(
                                   args.user)))
#/usr/bin/python3
#~/anaconda3/bin/python
from asnake.client import ASnakeClient
import asnake.logging as logging

logging.setup_logging(filename="date_update.log", filemode="a")
logger = logging.get_logger("date_updating")

#Log Into ASpace and set repo to RL
aspace_client = ASnakeClient(baseurl="[backendURL]",
                             username="******",
                             password="******")
aspace_client.authorize()
repo = aspace_client.get("repositories/2").json()
print("Logged into: " + repo['name'])

print("Getting list of resources...")
resources_list = aspace_client.get(
    "repositories/2/resources?all_ids=true").json()
resources_sorted = sorted(resources_list, reverse=True)

for resource in resources_sorted:

    try:
        resource_json = aspace_client.get("repositories/2/resources/" +
                                          str(resource)).json()
        #print (resource_json)
        resource_uri = resource_json['uri']
        print("updating: " + resource_uri)
        resource_update = aspace_client.post(resource_json['uri'],
                                             json=resource_json)
from asnake.aspace import ASpace
import asnake.logging as logging
import argparse
import datetime
import csv

logger = logging.get_logger('upload_accessions')


RELATOR_DICT = {
    'artist': 'art',
    'author': 'aut',
    'donor': 'dnr',
    'editor': 'edt',
    'publisher': 'pbl',
    'translator': 'trl'
}

DATE = datetime.date.today()
DATE = DATE.__str__()


def make_ex_doc(accession):
    eds = []

    if len(accession['external_documents1_title']) >= 1:
        ed_dict = {'jsonmodel_type': 'external_document',
                'location': '',
                'publish': bool,
                'title': ''}
        ed_dict['title'] = accession['external_documents1_title']
Ejemplo n.º 17
0
def dictify_sheet(sheet):
    rows = iter(sheet)
    rowmap = [cell.value.strip() for cell in next(rows) if cell.value]

    for row in rows:
        out = {}
        for idx, header in enumerate(rowmap):
            out[header] = cell_value(row[idx], header)
        yield out


if __name__ == '__main__':
    args = ap.parse_args()
    setup_logging(filename=args.logfile)
    log = get_logger('update_containers')

    aspace = ASpace()

    log.info('start_ingest')

    for row in dictify_sheet(next(iter(args.excel))):
        try:
            container = aspace.repositories(args.repo_id).top_containers(
                row['Container Record ID']).json()
            container['barcode'] = row['Barcode']
        except (AttributeError, RuntimeError) as e:
            log.error('FAILED update_container',
                      response=container,
                      data=row,
                      exc_info=e)
Ejemplo n.º 18
0
#!/usr/bin/env python
import re, json, csv, requests, glob, datetime, os
import asnake.logging as logging

# expects a 2-column CSV in which the first column has the resource ID and the second has the subject ID. Multiple subject IDs should be pipe-separated, e.g. "24|133|1313|234" or just 24. These subjects should only be _new_ subjects you're adding.

logname = 'logs/resource_processing_' + datetime.datetime.now().strftime(
    '%Y-%m-%d-T-%H-%M') + '.log'

logfile = open(logname, 'w')
logging.setup_logging(stream=logfile)
logger = logging.get_logger("process-CSV-to-resources")

# add ASnake Client
from asnake.client import ASnakeClient

client = ASnakeClient()
client.authorize()


# WRITE OUT THE ORIGINAL TO ANOTHER DIRECTORY AS A BACKUP. This is a place where one could take a param. Remember to either commit your backups each time or move them entirely because otherwise you'll end up with them being undone when you run it!
def quick_backup(resource_id, resource):
    original = "backups/original-" + resource_id + ".json"
    with open(original, "w") as backup:
        json.dump(resource, backup, indent=4)


# This function is what we do if the subject array is empty. Simply builds the array and fills subjects.
def no_original_subjects(resource, new_subjects):
    subjects = []
    for subject in new_subjects:
Ejemplo n.º 19
0
from requests import Session
from urllib.parse import urljoin, quote
from numbers import Number
from collections.abc import Sequence, Mapping

import json
import asnake.configurator as conf
import asnake.logging as logging

log = logging.get_logger(__name__)


class ASnakeAuthError(Exception):
    pass


class ASnakeWeirdReturnError(Exception):
    pass


def listlike_seq(seq):
    '''Determine if a thing is a list-like (sequence of values) sequence that's not string-like.'''
    return isinstance(seq,
                      Sequence) and not isinstance(seq, (
                          str,
                          bytes,
                          Mapping,
                      ))


def http_meth_factory(meth):
Ejemplo n.º 20
0
#!/usr/bin/env python3

import csv
import configparser
import json
import requests
import time

from asnake.client import ASnakeClient
import asnake.logging as logging
from asnake.aspace import ASpace
from configparser import ConfigParser, ExtendedInterpolation

logging.setup_logging(filename='logging.txt', level='INFO', filemode='a')
logger = logging.get_logger()

config = configparser.ConfigParser()
config.read('local_settings.cfg')

aspace = ASpace(baseurl=config['ArchivesSpace']['baseURL'],
                username=config['ArchivesSpace']['user'],
                password=config['ArchivesSpace']['password'])
repo = aspace.repositories(config['ArchivesSpace']['repository'])


def get_collection():
    """Returns a collection corresponding to an ID provided by user input"""
    try:
        identifier = input('Resource ID: ')
        return repo.resources(int(identifier))
    except Exception as e: