Ejemplo n.º 1
0
def get_global_ids(record_type):
    values = admin.login()
    output = admin.opentxt()
    get_ids = requests.get(values[0] + '/' + str(record_type) +
                           '?all_ids=true',
                           headers=values[1]).json()
    output.write(get_ids)
Ejemplo n.º 2
0
def delete_do_instance():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        archival_object_uri = row[0]
        digital_object_uri = row[1] #may or may not need this...could just loop through instances and check for DOs, then delete
        #OR could find a specific digital object instance I want to delete
        try:
            archival_object_json = requests.get(values[0] + archival_object_uri, headers=values[1]).json()
            instance_list = list(archival_object_json['instances'])
            if digital_object_uri == '':
                for instance in instance_list:
                    if instance['instance_type'] == 'digital_object':
                        archival_object_json['instances'].remove(instance)
                archival_object_data = json.dumps(archival_object_json)
                archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json()
                admin.writetxt(txtfile, archival_object_update)
                print(archival_object_update)
            else:
                for instance in instance_list:
                    if 'digital_object' in instance:
                        if instance['digital_object'] == {'ref': digital_object_uri}:
                            archival_object_json['instances'].remove(instance)
                archival_object_data = json.dumps(archival_object_json)
                archival_object_update = requests.post(values[0] + archival_object_uri, headers=values[1], data=archival_object_data).json()
                admin.writetxt(txtfile, archival_object_update)
                print(archival_object_update)
        except:
            txtfile.write('error, could not update ' + str(archival_object_uri))
            continue
Ejemplo n.º 3
0
def etv_ead():
    values = admin.login()
    inputfile = admin.opentxtin()
    outputfile = admin.opentxt()
    dirpath = admin.setdirectory()
    print('Downloading EAD files to directory')
    for ead_uri in inputfile:
        print('Retrieving' + str(ead_uri).rstrip())
        get_ead = requests.get(values[0] + ead_uri +
                               '.xml?include_unpublished=true',
                               headers=values[1],
                               stream=True).text
        #Finds URLs with 2-digit repo ids
        if re.search(r'[0-9]', ead_uri[15]):
            outfile = admin.openxml(dirpath, ead_uri[39:].rstrip())
            outfile.write(str(get_ead).rstrip())
        #Others - assumes it's a 1-digit repo id. How many institutions will have more than 99 repositories?
        else:
            outfile = admin.openxml(dirpath, ead_uri[38:].rstrip())
            outfile.write(str(get_ead).rstrip())
    '''the subprocess call cannot take the EAD from AS directly as input. First need to save the file, and then run the 
    transformation over each file'''
    print('Done!')
    print('Transforming EAD files to Yale Best Practices guidelines')
    filelist = os.listdir(dirpath)
    os.makedirs(dirpath + '/outfiles')
    for file in filelist:
        #finds all the EAD files in the working directory
        if file[-3:] == 'xml':
            #haven't changed the hard coding of the command or the xsl file yet
            subprocess.run([
                "java", "-cp",
                "/usr/local/Cellar/saxon/9.8.0.4/libexec/saxon9he.jar",
                "net.sf.saxon.Transform", "-s:" + dirpath + '/' + file,
                "-xsl:" + dirpath +
                "/transformations/yale.aspace_v112_to_yalebpgs.xsl",
                "-o:" + dirpath + '/outfiles/' + file[:-4] + "_out.xml"
            ])
    '''next we need to validate each output file against the EAD 2002 schema and the local Yale schematron'''
    print('Done!')
    print('Validating transformations against EAD 2002 and Schematron schemas')
    newfilelist = os.listdir(dirpath + '/outfiles')
    for outfile in newfilelist:
        subprocess.Popen([
            "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar",
            "-s", dirpath + "/transformations/yale.aspace.ead2002.sch",
            dirpath + '/outfiles/' + outfile
        ],
                         stdout=outputfile,
                         stderr=subprocess.PIPE,
                         encoding='utf-8')
        subprocess.Popen([
            "/Users/aliciadetelich/git/crux/target/crux-1.3-SNAPSHOT-all.jar",
            dirpath + '/outfiles/' + outfile
        ],
                         stdout=outputfile,
                         stderr=subprocess.PIPE,
                         encoding='utf-8')
    print('All Done! Check outfile for validation report')
Ejemplo n.º 4
0
def get_enumerations():
    values = admin.login()
    output = admin.opentxt()
    enumerations = requests.get(values[0] +
                                '/config/enumerations?all_ids=true',
                                headers=values[1]).json()
    json.dump(enumerations, output, indent=4, separators=(',', ':'))
    output.close()
Ejemplo n.º 5
0
def export_ead(repo_id, resource):
    values = admin.login()
    output = admin.opentxt()
    get_ead = requests.get(values[0] + '/repositories/' + str(repo_id) +
                           '/resource_descriptions/' + str(resource) +
                           '.xml?include_unpublished=true',
                           headers=values[1],
                           stream=True).text
    output.write(str(get_ead))
    output.close()
Ejemplo n.º 6
0
def get_rids():
    csvfile = admin.opencsv()
    txtinput = admin.opentxtin()
    txtoutput = admin.opentxt()
    new_dict = {}
    for row in csvfile:
        new_dict[row[0]] = row[1]
    for line in txtinput:
        line = line.rstrip()
        new_list = [k for k, v in new_dict.items() if line == v][0]
        txtoutput.write('/repositories/12/resource_descriptions/' + new_list +
                        '\n')
Ejemplo n.º 7
0
def delete_records():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        record_uri = row[0]
        try:
            record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
            record_data = json.dumps(record_json)
            delete = requests.delete(values[0] + record_uri, headers=values[1], data=record_data).json()
            admin.writetxt(txtfile, delete)
            print(delete)
        except:
            txtfile.write('error, could not delete ' + str(record_uri))
            continue
Ejemplo n.º 8
0
def get_global_json(record_type):
    values = admin.login()
    output = admin.opentxt()
    get_ids = requests.get(values[0] + '/' + str(record_type) +
                           '?all_ids=true',
                           headers=values[1]).json()
    #    print(get_ids)
    x = 0
    for i in get_ids:
        x = x + 1
        resource = requests.get(values[0] + '/' + str(record_type) + '/' +
                                str(i),
                                headers=values[1]).json()
        json.dump(resource, output, indent=4, separators=(',', ':'))
        print('Dumping ' + str(x) + ' of ' + str(len(get_ids)))
    output.close()
Ejemplo n.º 9
0
def create_singlepart_notes():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        record_uri = row[0]
        note_text = row[1]
        note_type = row[2]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        new_note = {'jsonmodel_type': 'note_singlepart', 'content': [note_text],
                    'type': note_type}
        record_json['notes'].append(new_note)
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        admin.writetxt(txtfile, record_update)
        print(record_update)
Ejemplo n.º 10
0
def delete_notes():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        resource_uri = row[0]
        persistent_id = row[1]
        resource_json = requests.get(values[0] + resource_uri, headers=values[1]).json()
        for key, valuelist in resource_json.items():
            if key == 'notes':
                for note in valuelist:
                    newdict = {k:v for k,v in note.items()}
                    for key, value in newdict.items():
                        if value == persistent_id:
                            note.clear()
        resource_data = json.dumps(resource_json)
        resource_update = requests.post(values[0] + resource_uri, headers=values[1], data=resource_data).json()
        admin.writetxt(txtfile, resource_update)
        print(resource_update)
Ejemplo n.º 11
0
def create_multipart_notes():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        record_uri = row[0]
        note_text = row[1]
        note_type = row[2]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        new_note = {"jsonmodel_type": "note_multipart", 
                    "subnotes": [{'content': note_text, 'jsonmodel_type': 'note_text', 'publish': True}], 
                    'type': note_type, 'publish': True}
        try:
            record_json['notes'].append(new_note)
        except:
            print('note did not append')
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        admin.writetxt(txtfile, record_update)
        print(record_update)
Ejemplo n.º 12
0
def update_subrecord_components(subrecord, *field):
    starttime = time.time()
    values = admin.login()
    csvfile = admin.opencsvdict()
    txtout = admin.opentxt()
    x = 0
    y = 0
    for row in csvfile:
        x = x + 1
        record_uri = row['uri']
        try:
            record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
            for f in field:
                for key, value in row.items():
                    if f == key:
                        if key == 'repository':
                            record_json[subrecord][0][key] = {'ref': value}
                        else:
                            #this needs the position because it doesn't update every one, just the first.
                            #all the more reason we need IDs for this stuff.                                       
                            record_json[subrecord][0][key] = value
            record_data = json.dumps(record_json)
            record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
            print(record_update)
            if 'status' in record_update.keys():
                y = y + 1
            if 'error' in record_update.keys():
                txtout.write('error: could not update ' + str(record_uri) + '\n')
                txtout.write('log: ' + str(record_update.get('error')) + '\n')
        except:
            txtout.write('could not locate object ' + str(record_uri))
            continue            
    elapsedtime = time.time() - starttime
    m, s = divmod(elapsedtime, 60)
    h, m = divmod(m, 60)
    txtout.write('Total time elapsed: ')
    txtout.write('%d:%02d:%02d' % (h, m, s))
    txtout.write('\n' + 'Total update attempts: ' + str(x) + '\n')
    #add count of successful updates to log file
    txtout.write('Records updated successfully: ' + str(y) + '\n')
    txtout.close()
Ejemplo n.º 13
0
def replace_note_by_id():
    #replaces a note's content in ArchivesSpace using a persistent ID
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        resource_uri = row[0]
        persistent_id = row[1]
        note_text = row[2]
        resource_json = requests.get(values[0] + resource_uri, headers=values[1]).json()
        for note in resource_json['notes']:
            if note['jsonmodel_type'] == 'note_multipart':
                if note['persistent_id'] == persistent_id:
                    note['subnotes'][0]['content'] = note_text
            elif note['jsonmodel_type'] == 'note_singlepart':
                if note['persistent_id'] == persistent_id:
                    note['content'] = [note_text]
        resource_data = json.dumps(resource_json)
        resource_update = requests.post(values[0] + resource_uri, headers=values[1], data=resource_data).json()
        admin.writetxt(txtfile, resource_update)
        print(resource_update)
Ejemplo n.º 14
0
def create_rights_restrictions():
    values = admin.login()
    csvfile = admin.opencsv()
    txtfile = admin.opentxt()
    for row in csvfile:
        record_uri = row[0]
        persistent_id = row[1]
        begin = row[2]
        end = row[3]
        local_type = row[4]
        note_type = row[5]
        record_json = requests.get(values[0] + record_uri, headers=values[1]).json()
        new_restriction = {'begin': begin, 'end': end, 'local_access_restriction_type': [local_type],
                           'restriction_note_type': note_type, 'jsonmodel_type': 'rights_restriction'}
        for note in record_json['notes']:
            if note['persistent_id'] == persistent_id:
                note['rights_restriction'] = new_restriction
        record_data = json.dumps(record_json)
        record_update = requests.post(values[0] + record_uri, headers=values[1], data=record_data).json()
        admin.writetxt(txtfile, record_update)
        print(record_update)
Ejemplo n.º 15
0
def update_subrecord_component(subrecord, component):
    starttime = time.time()
    values = admin.login()
    csvfile = admin.opencsv()
    txtout = admin.opentxt()
    x = 0
    y = 0
    for row in csvfile:
        x = x + 1
        resource_uri = row[0]    
        updated_text = row[1]
        try:
            resource_json = requests.get(values[0] + resource_uri, headers=values[1]).json()
            #This doesn't need the position because it will update any value...careful!
            for date in resource_json[subrecord]:
                date[component] = updated_text
                resource_data = json.dumps(resource_json)
                resource_update = requests.post(values[0]+ resource_uri, headers=values[1], data=resource_data).json()
                print(resource_update)
                if 'status' in resource_update.keys():
                    y = y + 1
                if 'error' in resource_update.keys():
                    txtout.write('error: could not update ' + str(resource_uri) + '\n')
                    #this isn't working
                    txtout.write('log: ' + str(resource_update.get('error')) + '\n')
        except:
            txtout.write('could not locate object ' + str(resource_uri) + '\n')
            continue
    elapsedtime = time.time() - starttime
    m, s = divmod(elapsedtime, 60)
    h, m = divmod(m, 60)
    txtout.write('Total time elapsed: ')
    txtout.write('%d:%02d:%02d' % (h, m, s))
    txtout.write('\n' + 'Total update attempts: ' + str(x) + '\n')
    #add count of successful updates to log file
    txtout.write('Records updated successfully: ' + str(y) + '\n')
    txtout.close()