Beispiel #1
0
class ASpace():
    # this happens when you call ASpace()
    def __init__(self, **config):
        # Connect to ASpace using .archivessnake.yml
        self.client = ASnakeClient(**config)
        self.client.authorize()
        m = re.match(r'\(v?(.+\))', self.client.get('version').text)
        if m:
            self.version = m[1]
        else:
            self.version = 'unknown version'

    def __getattr__(self, attr):
        '''returns the JSONModelRelation representing the route with the same name as the attribute requested.'''
        if not attr.startswith('_'):
            return JSONModelRelation("/{}".format(attr), params={"all_ids": True}, client = self.client)

    @property
    def resources(self):
        '''return all resources from every repo.'''

        repo_uris = [r['uri'] for r in self.client.get('repositories').json()]
        for resource in chain(*[self.client.get_paged('{}/resources'.format(uri)) for uri in repo_uris]):
            yield wrap_json_object(resource, self.client)

    @property
    def agents(self):
        '''returns an AgentRelation.'''
        return AgentRelation("/agents", {}, self.client)


    def by_external_id(self, external_id, record_types=None):
        '''return any resources fetched from the 'by-external-id' route.

Note: while the route will return differently depending on how many records are returned,
this method deliberately flattens that out - it will _always_ return a generator, even if only
one record is found.'''
        params = {"eid": external_id}
        if record_types: params['type[]'] = record_types

        res = self.client.get('by-external-id', params=params)
        if res.status_code == 404:
            return []
        elif res.status_code == 300: # multiple returns, bare list of uris
            yield from (wrap_json_object({"ref": uri}, self.client) for uri in IndexedSet(res.json()))
        elif res.status_code == 200: # single obj, redirects to obj with 303->200
            yield wrap_json_object(res.json(), self.client)
        else:
            raise ASnakeBadReturnCode("by-external-id call returned '{}'".format(res.status_code))
    elif isinstance(repo, dict):  # For pulling all repos from ASpace
        repo_code = repo['repo_code']
        repo_uri = repo['uri']
        repo_no = repo_uri.split('/')[-1]
    else:
        sys.exit('List of repositories is not valid')

    # For each repository search the API for the terms in the file list
    for search_term in search_terms:
        term = search_term['term']
        regex = search_term['regex']

        results = client.get_paged(f'repositories/{repo_no}/search',
                                   params={
                                       "q":
                                       f"primary_type:{primary_types} \
                        # NOT types:pui \
                        AND (title:/{regex}/ OR notes:/{regex}/)"
                                   })
        search_results = list(results)
        print(repo_code, repo_uri, term, len(search_results))

        ## Process the search results for each term
        for result in search_results:
            json_data = json.loads(result.get('json'))

            matches = []
            json_data = json.loads(result.get('json'))

            ## Process title
            title = json_data.get('title')
Beispiel #3
0
        "op":
        "AND",
        "subqueries": [{
            "jsonmodel_type": "field_query",
            "field": "primary_type",
            "value": "agent_person",
            "literal": True
        }, {
            "jsonmodel_type": "field_query",
            "field": "source",
            "value": "viaf",
            "literal": True
        }]
    }
})
ASoutput = list(client.get_paged("/search", params={"filter": query}))
print('Found ' + str(len(ASoutput)) + ' agents.')

# grab uri out of agent
for person in ASoutput:
    uri = person['uri']
    personRecord = client.get(uri).json()
    lockVersion = str(personRecord['lock_version'])
    primary_name = personRecord['names'][0]['primary_name']
    try:
        secondary_name = personRecord['names'][0]['rest_of_name']
    except:
        secondary_name = ''
    try:
        dates = personRecord['names'][0]['dates']
    except:
Beispiel #4
0
                    "value": ref_ID, "literal":True
                },
                {
                    "jsonmodel_type":"field_query",
                    "field":"types",
                    "value":"pui",
                    "literal":True
                }
            ]
        }
    })

    # it can take some time for the posted DOs to be indexed, so...
    showed_up_yet = None
    while not showed_up_yet:
        aoSearch = list(client.get_paged('search', params={"filter": AOQuery}))
        if any(aoSearch):
            showed_up_yet = True
        else:
            print("DOs not present in search yet, waiting a second for the indexer to catch up")
            sleep(1)
    linked_ao_uri = aoSearch[0]['uri']
    # Get and store archival objects from above search
    aoRecord = client.get(linked_ao_uri).json()
    # Find existing instances and create new ones from new digital objects
    exising_instance = aoRecord['instances'][0]
    new_instance = {"instance_type": "digital_object", "digital_object": {"ref": uri}}

    # Merge old and new instances
    instances_new = []
    instances_new.append(exising_instance)
Beispiel #5
0
# provide instructions
print ('This script is used to link all top_containers in a single collection (identified by the ArchivesSpace resource ID number) to a single container_profile (identified by the ArchivesSpace container_profile ID number).')
input('Press Enter to continue...')

# have user enter resource id
resource_id = input('Enter resource ID (in this case, you should enter 1): ')

# search for top_containers linked to entered resource id
endpoint = '/repositories/2/top_containers/search'
advanced_query = json.dumps({
    "filter_term": {
        "field": "collection_uri_u_sstr",
        "value": "/repositories/2/resources/" + resource_id,
        "jsonmodel_type":"field_query"}
})
results = list(client.get_paged(endpoint, params={'aq': advanced_query}))

# populate top_containers with the ids of each top_container in search results
top_containers = []
for value in gen_dict_extract('id', results):
    top_containers.append(value)

# GET each top_container listed in top_containers and add to records
records = []
for top_container in top_containers:
    output = client.get(top_container).json()
    records.append(output)

# have user enter container profile id
profile_id = input('Enter container profile ID (I am going to enter 9. You can select another value, as long that ID is in your instance of ArchivesSpace.): ')
Beispiel #6
0
# search AS for archival_object's with level "Web archive"

warchives = list(client.get_paged(  # get_paged returns an iterator, so wrap in list since we use it multiple times
    'search',                              # the query URL
    params={
        "filter": json.dumps(              # use json.dumps to serialize the query JSON into a string - remember that query is passed as a GET param in the URL
            {"query":
             {"jsonmodel_type": "boolean_query",
              "op":"AND",
              "subqueries":[
                  {"jsonmodel_type":"field_query",
                   "field":"primary_type",
                   "value":"archival_object",
                   "literal":True},
                  {"jsonmodel_type":"field_query",
                   "field":"level",
                   "value":"Web archive",
                   "literal":True},
                  {"jsonmodel_type":"field_query",
                   "field":"types",
                   "value":"pui",
                   "literal":True}
              ]
             }
            } # end query
        ) # end json.dumps
    } # end params
)) # end list and client.get_paged

print('Found ' + str(len(warchives)) +
      ' archival objects with the instance type "Web archive."')