def update_sc_components(*field):
    values = admin.login()
    csvfile = admin.opencsvdict()
    fields = field
    for row in csvfile:
        record_uri = row[fields[0]]
        fieldlist = fields[1:]
        print(fieldlist)
        print(record_uri)
        if 'position' in fieldlist:
            record_json = requests.get(values[0] + record_uri,
                                       headers=values[1]).json()
            for field in fieldlist:
                for key, value in row.items():
                    if field == key:
                        print(value)
                        position = int(row['position'])
                        record_json['instances'][position]['sub_container'][
                            key] = value
        else:
            record_json = requests.get(values[0] + record_uri,
                                       headers=values[1]).json()
            for field in fieldlist:
                for key, value in row.items():
                    if field == key:
                        print(value)
                        record_json['instances'][0]['sub_container'][
                            key] = value
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri,
                                      headers=values[1],
                                      data=record_data).json()
        print(record_update)
def create_container_profiles():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        name = row[0]
        extent_dimension = row[1]
        height = row[2]
        width = row[3]
        depth = row[4]
        dimension_units = row[5]
        #takes data from spreadsheet and builds JSON
        new_container_profile = {
            'jsonmodel_type': 'container_profile',
            'name': name,
            'extent_dimension': extent_dimension,
            'height': height,
            'width': width,
            'depth': depth,
            'dimension_units': dimension_units
        }
        container_profile_data = json.dumps(new_container_profile)
        #Posts JSON to ArchivesSpace
        create_profile = requests.post(values[0] + '/container_profiles',
                                       headers=values[1],
                                       data=container_profile_data).json()
        #Prints what is happening to IDLE window - will add an error log as well
        print(create_profile)
def delete_do_instance():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        archival_object_uri = row[0]
        digital_object_uri = row[1] #may or may not need this...could just loop through instances and check for DOs, then delete
        #OR could find a specific digital object instance I want to delete
        try:
            archival_object_json = requests.get(values[0] + archival_object_uri, headers=values[1]).json()
            instance_list = list(archival_object_json['instances'])
            if digital_object_uri == '':
                for instance in instance_list:
                    if instance['instance_type'] == 'digital_object':
                        archival_object_json['instances'].remove(instance)
                archival_object_data = json.dumps(archival_object_json)
                archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json()
                admin.writetxt(txtfile, archival_object_update)
                print(archival_object_update)
            else:
                for instance in instance_list:
                    if 'digital_object' in instance:
                        if instance['digital_object'] == {'ref': digital_object_uri}:
                            archival_object_json['instances'].remove(instance)
                archival_object_data = json.dumps(archival_object_json)
                archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json()
                admin.writetxt(txtfile, archival_object_update)
                print(archival_object_update)
        except:
            txtfile.write('error, could not update ' + str(archival_object_uri))
            continue
def create_records(jsonmodel_type, *subrecords):
    values = admin.login()
    csvfile = admin.opencsvdict()
    data_dict = schema.parse_schema()
    #Loops through each file in the CSV
    for row in csvfile:
        new_record = {'jsonmodel_type': jsonmodel_type}
        for k, v in row.items():
            if k in data_dict[jsonmodel_type]:
                new_record[k] = v
        if 'publish' in row.keys():
            if row['publish'] == 'True':
                new_record['publish'] = True
            elif row['publish'] == 'False':
                new_record['publish'] = False
        newdict = dict.fromkeys(subrecords)
        for subrecord in subrecords:
            newdict[subrecord] = [{}]
            if subrecord in data_dict[jsonmodel_type]:
                print(data_dict[jsonmodel_type][subrecord])
                for key in data_dict[jsonmodel_type][subrecord].keys():
                    if key in row.keys():
                        for member in newdict[subrecord]:
                            member.update({key: row[key]})
        new_record.update(newdict)    
        pprint.pprint(new_record)
        record_data = json.dumps(new_record)
        record_create = requests.post(values[0] + row['repo_uri'] + '/' + jsonmodel_type + 's', headers=values[1], data=record_data).json()
        print(record_create)
Beispiel #5
0
def get_global_ids(record_type):
    values = admin.login()
    output = admin.opentxt()
    get_ids = requests.get(values[0] + '/' + str(record_type) +
                           '?all_ids=true',
                           headers=values[1]).json()
    output.write(get_ids)
def create_resources():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        repo_uri = row[0]
        identifier = row[1]
        title = row[2]
        language = row[3]
        level = row[4]
        begin_date = row[5]
        end_date = row[6]
        date_type = row[7]
        date_label = row[8]
        extent_type = row[9]
        extent_portion = row[10]
        extent_number = row[11]
        container_summary = row[12]
        new_resource = {'id_0': identifier, 'title': title, 'language': language, 'level': level,
                        'dates' : [{'begin': begin_date, 'end': end_date, 'date_type': date_type, 'label': date_label, 
                                    'jsonmodel_type': 'date'}],
                        'extents': [{'extent_type': extent_type, 'portion': extent_portion, 'number': extent_number, 
                                     'container_summary': container_summary, 'jsonmodel_type': 'extent'}], 
                        'repository': {'ref': repo_uri}, 'jsonmodel_type': 'resource'}
        resource_data = json.dumps(new_resource)
        print(resource_data)
        resource_create = requests.post(values[0] + repo_uri + '/resources', headers=values[1], data=resource_data).json()
        print(resource_create)
Beispiel #7
0
def etv_ead():
    values = admin.login()
    inputfile = admin.opentxtin()
    outputfile = admin.opentxt()
    dirpath = admin.setdirectory()
    print('Downloading EAD files to directory')
    for ead_uri in inputfile:
        print('Retrieving' + str(ead_uri).rstrip())
        get_ead = requests.get(values[0] + ead_uri +
                               '.xml?include_unpublished=true',
                               headers=values[1],
                               stream=True).text
        #Finds URLs with 2-digit repo ids
        if re.search(r'[0-9]', ead_uri[15]):
            outfile = admin.openxml(dirpath, ead_uri[39:].rstrip())
            outfile.write(str(get_ead).rstrip())
        #Others - assumes it's a 1-digit repo id. How many institutions will have more than 99 repositories?
        else:
            outfile = admin.openxml(dirpath, ead_uri[38:].rstrip())
            outfile.write(str(get_ead).rstrip())
    '''the subprocess call cannot take the EAD from AS directly as input. First need to save the file, and then run the 
    transformation over each file'''
    print('Done!')
    print('Transforming EAD files to Yale Best Practices guidelines')
    filelist = os.listdir(dirpath)
    os.makedirs(dirpath + '/outfiles')
    for file in filelist:
        #finds all the EAD files in the working directory
        if file[-3:] == 'xml':
            #haven't changed the hard coding of the command or the xsl file yet
            subprocess.run([
                "java", "-cp",
                "/usr/local/Cellar/saxon/9.8.0.4/libexec/saxon9he.jar",
                "net.sf.saxon.Transform", "-s:" + dirpath + '/' + file,
                "-xsl:" + dirpath +
                "/transformations/yale.aspace_v112_to_yalebpgs.xsl",
                "-o:" + dirpath + '/outfiles/' + file[:-4] + "_out.xml"
            ])
    '''next we need to validate each output file against the EAD 2002 schema and the local Yale schematron'''
    print('Done!')
    print('Validating transformations against EAD 2002 and Schematron schemas')
    newfilelist = os.listdir(dirpath + '/outfiles')
    for outfile in newfilelist:
        subprocess.Popen([
            "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar",
            "-s", dirpath + "/transformations/yale.aspace.ead2002.sch",
            dirpath + '/outfiles/' + outfile
        ],
                         stdout=outputfile,
                         stderr=subprocess.PIPE,
                         encoding='utf-8')
        subprocess.Popen([
            "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar",
            dirpath + '/outfiles/' + outfile
        ],
                         stdout=outputfile,
                         stderr=subprocess.PIPE,
                         encoding='utf-8')
    print('All Done! Check outfile for validation report')
def create_instances():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        archival_object_uri = row[0]
        top_container_uri = row[1]
        child_type = row[2]
        child_indicator = row[3]
        grandchild_type = row[4]
        grandchild_indicator = row[5]
        instance_type = row[6]
        archival_object_json = requests.get(values[0] + archival_object_uri,
                                            headers=values[1]).json()
        if grandchild_type != '':
            new_instance = {
                "instance_type": instance_type,
                "jsonmodel_type": "instance",
                "sub_container": {
                    "jsonmodel_type": "sub_container",
                    "indicator_2": child_indicator,
                    "type_2": child_type,
                    "indicator_3": grandchild_indicator,
                    "type_3": grandchild_type,
                    "top_container": {
                        "ref": top_container_uri
                    }
                }
            }
        elif child_type != '':
            new_instance = {
                "instance_type": instance_type,
                "jsonmodel_type": "instance",
                "sub_container": {
                    "jsonmodel_type": "sub_container",
                    "indicator_2": child_indicator,
                    "type_2": child_type,
                    "top_container": {
                        "ref": top_container_uri
                    }
                }
            }
        else:
            new_instance = {
                "instance_type": instance_type,
                "jsonmodel_type": "instance",
                "sub_container": {
                    "jsonmodel_type": "sub_container",
                    "top_container": {
                        "ref": top_container_uri
                    }
                }
            }
        archival_object_json["instances"].append(new_instance)
        archival_object_data = json.dumps(archival_object_json)
        archival_object_update = requests.post(
            values[0] + archival_object_uri,
            headers=values[1],
            data=archival_object_data).json()
        print(archival_object_update)
Beispiel #9
0
def get_enumerations():
    values = admin.login()
    output = admin.opentxt()
    enumerations = requests.get(values[0] +
                                '/config/enumerations?all_ids=true',
                                headers=values[1]).json()
    json.dump(enumerations, output, indent=4, separators=(',', ':'))
    output.close()
def create_repositories():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        repo_name = row[0]
        record_json = {'repo_name': repo_name}
        record_data = json.dumps(record_json)
        record_update = requests.delete(values[0] + '/repositories', headers=values[1], data=record_data).json()
        print(record_update)
def delete_subrecord_components():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        record_uri = row[0]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        record_data = json.dumps(record_json)
        record_update = requests.delete(values[0] + record_uri, headers=values[1], data=record_data).json()
        print(record_update)
def create_note_bibliography():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        record_uri = row[0]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        print(record_update)
Beispiel #13
0
def export_ead(repo_id, resource):
    values = admin.login()
    output = admin.opentxt()
    get_ead = requests.get(values[0] + '/repositories/' + str(repo_id) +
                           '/resource_descriptions/' + str(resource) +
                           '.xml?include_unpublished=true',
                           headers=values[1],
                           stream=True).text
    output.write(str(get_ead))
    output.close()
def create_top_containers():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        barcode = row[0]
        indicator = row[1]
        container_profile_uri = row[2]
        locations = row[3]
        start_date = row[4]
        repo_num = row[5]
        if barcode != '':
            create_tc = {
                'barcode':
                barcode,
                'container_profile': {
                    'ref': container_profile_uri
                },
                'indicator':
                indicator,
                'container_locations': [{
                    'jsonmodel_type': 'container_location',
                    'status': 'current',
                    'start_date': start_date,
                    'ref': locations
                }],
                'jsonmodel_type':
                'top_container',
                'repository': {
                    'ref': repo_num
                }
            }
        else:
            create_tc = {
                'container_profile': {
                    'ref': container_profile_uri
                },
                'indicator':
                indicator,
                'container_locations': [{
                    'jsonmodel_type': 'container_location',
                    'status': 'current',
                    'start_date': start_date,
                    'ref': locations
                }],
                'jsonmodel_type':
                'top_container',
                'repository': {
                    'ref': repo_num
                }
            }
        tcdata = json.dumps(create_tc)
        tcupdate = requests.post(values[0] + repo_num + '/top_containers',
                                 headers=values[1],
                                 data=tcdata).json()
        print(tcupdate)
Beispiel #15
0
def get_enums():
    values = admin.login()
    #scheme = parse_schema()
    enumerations = requests.get(values[0] +
                                '/config/enumerations?all_ids=true',
                                headers=values[1]).json()
    for enumeration in enumerations:
        #want to match the schema keys (and subkeys) with the names
        pprint.pprint(enumeration['name'])
        #if there is a match want to append the whole valuelist as a value of the schema key/subkeys
        pprint.pprint(enumeration['values'])
Beispiel #16
0
def link_agent():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        agent_uri = row[0]
        description_uri = row[1]
        description_json = requests.get(values[0] + description_uri, headers=values[1]).json()
        description_json['linked_agents'].append({'ref': agent_uri})
        description_data = json.dumps(description_json)
        description_post = requests.post(values[0] + description_uri, headers=values[1], data=description_data).json()
        print(description_post)
def create_external_documents():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        record_uri = row[0]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        new_ext_doc = {'jsonmodel_type': 'external_document'}
        record_json['external_documents'].append(new_ext_doc)
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        print(record_update)
Beispiel #18
0
def update_subject_component():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        subject_uri = row[0]
        component_to_update = row[1]
        updated_text = row[2]
        subject_json = requests.get(values[0] + '/subjects/' + subject_uri, headers=values[1]).json()
        subject_json[component_to_update] = updated_text
        subject_data = json.dumps(subject_json)
        subject_update = requests.post(values[0] + subject_uri, headers=values[1], data=subject_data).json()
        print(subject_update)        
def link_records():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        classification = row[0]
        record = row[1]
        new_rec_link = requests.get(values[0] + classification, headers=values[1]).json()
        new_rec_link['linked_records'].append({'ref': record})
        print(new_rec_link)
        new_link_json = json.dumps(new_rec_link)
        new_link_post = requests.post(values[0] + classification, headers=values[1], data=new_link_json).json()
        print(new_link_post)
def create_file_versions():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        record_uri = row[0]
        file_uri = row[1]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        new_file_version = {'file_uri': file_uri, 'jsonmodel_type': 'file_version'}
        record_json['file_versions'].append(new_file_version)
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        print(record_update)
def update_record_component():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        record_uri = row[0]
        component_to_update = row[1]
        updated_text = row[2]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        record_json[component_to_update] = updated_text
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        print(record_update)
def create_digital_objects():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        repo_uri = row[0]
        digital_object_id = row[1]
        title = row[2]
        new_do = {'digital_object_id': digital_object_id, 'jsonmodel_type': 'digital_object', 
                  'title': title, 'repository': {'ref': repo_uri}}
        do_data = json.dumps(new_do)
        do_create = requests.post(values[0] + repo_uri + '/digital_objects', headers=values[1], data=do_data).json()
        print(do_create)
def create_top_level():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        identifier = row[0]
        title = row[1]
        description = row[2]
        repository = row[3]
        new_class_term = {"identifier": identifier, "title": title, "description": description, "publish": True,
                          "repository": {'ref': repository}}
        new_class_json = json.dumps(new_class_term)
        new_class_post = requests.post(values[0] + '/repositories/12/classifications', headers=values[1], data=new_class_json).json()
        print(new_class_post)
def create_dig_object_components():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        repo_uri = row[0]
        parent_uri = row[1]
        component_id = row[2]
        title = row[3]
        new_doc = {'component_id': component_id, 'title': title, 'parent': {'ref': parent_uri},
                   'repository': {'ref': repo_uri}, 'jsonmodel_type': 'digital_object_component'}
        doc_data = json.dumps(new_doc)
        doc_create = requests.post(values[0] + repo_uri + '/digital_object_components', headers=values[1], data=doc_data).json()
        print(doc_create)
def create_accessions():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        repo_uri = row[0]
        identifier = row[1]
        title = row[2]
        accession_date = row[3]
        new_accession = {'id_0': identifier, 'title': title, 'accession_date': accession_date, 'repository': {'ref': repo_uri}, 
                         'jsonmodel_type': 'accession'}
        accession_data = json.dumps(new_accession)
        accession_create = requests.post(values[0] + repo_uri + '/accessions', headers=values[1], data=accession_data).json()
        print(accession_create)
Beispiel #26
0
def export_eads(repo_id):
    values = admin.login()
    infile = admin.readtxt()
    dirpath = admin.setdirectory()
    for resource in infile:
        get_ead = requests.get(values[0] + '/repositories/' + str(repo_id) +
                               '/resource_descriptions/' + str(resource) +
                               '.xml?include_unpublished=true',
                               headers=values[1],
                               stream=True).text
        outfile = admin.openxml(dirpath, resource)
        outfile.write(str(get_ead))
        outfile.close()
    input.close()
def update_tc_component():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        top_container_uri = row[0]
        component_to_update = row[1]
        update_text = row[2]
        tc_json = requests.get(values[0] + top_container_uri,
                               headers=values[1]).json()
        tc_json[component_to_update] = update_text
        tc_data = json.dumps(tc_json)
        tc_update = requests.post(values[0] + top_container_uri,
                                  headers=values[1],
                                  data=tc_data).json()
        print(tc_update)
def update_record_pub_status():
    values = admin.login()
    csvfile = admin.opencsv()
    for row in csvfile:
        record_uri = row[0]
        #1 for publish, 0 for unpublish
        updated_status = row[1]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        if updated_status == '1':
            record_json['publish'] = True
        elif updated_status == '0':
            record_json['publish'] = False
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        print(record_update)
def delete_records():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        record_uri = row[0]
        try:
            record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
            record_data = json.dumps(record_json)
            delete = requests.delete(values[0] + record_uri, headers=values[1], data=record_data).json()
            admin.writetxt(txtfile, delete)
            print(delete)
        except:
            txtfile.write('error, could not delete ' + str(record_uri))
            continue
Beispiel #30
0
def get_global_json(record_type):
    values = admin.login()
    output = admin.opentxt()
    get_ids = requests.get(values[0] + '/' + str(record_type) +
                           '?all_ids=true',
                           headers=values[1]).json()
    #    print(get_ids)
    x = 0
    for i in get_ids:
        x = x + 1
        resource = requests.get(values[0] + '/' + str(record_type) + '/' +
                                str(i),
                                headers=values[1]).json()
        json.dump(resource, output, indent=4, separators=(',', ':'))
        print('Dumping ' + str(x) + ' of ' + str(len(get_ids)))
    output.close()