def repair_ark(self, obj): ark_target = None try: ark_target = self.pidman.get_ark_target(noid=obj.noid, qualifier='') except: self.unrepaired_count += 1 self.log(level=WARNING, message="Failed to find ARK target for %s" % (obj.pid)) return parsed_ark = parse_ark(ark_target['access_uri']) naan = parsed_ark['naan'] noid = parsed_ark['noid'] if hasattr(obj, 'mods'): obj.mods.content.identifiers.extend([ mods.Identifier(type='ark', text='ark:/%s/%s' % (naan, noid)), mods.Identifier(type='uri', text=ark_target['access_uri']) ]) else: obj.dc.content.identifier_list(ark_target['access_uri']) if self.options['dry_run']: self.unrepaired_count += 1 self.log(message='ARK target found for %s' % obj.pid) return # save the collection object w/ updated ark try: self.log(level=INFO, message="Attempting to save %s" % obj.pid) obj.save(logMessage='Fixing missing ARK') self.repaired_count += 1 except DigitalObjectSaveFailure: self.log(message="An error occurred while saving %s" % (obj.pid))
def test_create_mods(self): # test creating MODS from scratch - ensure sub-xmlobject definitions are correct # and produce schema-valid MODS mymods = mods.MODS() mymods.create_title_info() # titleInfo subfields mymods.title_info.non_sort = 'A ' mymods.title_info.title = 'Record' mymods.title_info.subtitle = ': for testing' mymods.title_info.part_number = '1' mymods.title_info.part_name = 'first installment' mymods.title_info_list.append(mods.TitleInfo(non_sort='An ', title='Alternative Title', subtitle=': for testing', part_number = '1', part_name='first installment', label='First line')) mymods.resource_type = 'text' mymods.create_name() mymods.name.type = 'personal' mymods.name.authority = 'local' mymods.name.name_parts.extend([mods.NamePart(type='family', text='Schmoe'), mods.NamePart(type='given', text='Joe')]) mymods.name.roles.append(mods.Role(type='text', authority='local', text='Test Subject')) mymods.create_abstract() mymods.abstract.text = 'A testing record with made up content.' mymods.create_note() mymods.note.type = 'general' mymods.note.text = 'general note' mymods.create_origin_info() mymods.origin_info.created.append(mods.DateCreated(date='2001-10-02')) mymods.origin_info.issued.append(mods.DateIssued(date='2001-12-01')) mymods.create_record_info() mymods.record_info.record_id = 'id:1' mymods.identifiers.extend([mods.Identifier(type='uri', text='http://ur.l'), mods.Identifier(type='local', text='332')]) mymods.access_conditions.extend([mods.AccessCondition(type='restriction', text='unavailable'), mods.AccessCondition(type='use', text='Tuesdays only')]) mymods.related_items.extend([mods.RelatedItem(type='host', title='EU Archives'), mods.RelatedItem(type='isReferencedBy', title='Finding Aid')]) mymods.subjects.extend([mods.Subject(authority='keyword', topic='automated testing'), mods.Subject(authority='keyword', topic='test records')]) mymods.parts.append(mods.Part()) mymods.parts[0].details.extend([mods.PartDetail(type='volume', number='90'), mods.PartDetail(type='issue', number='2')]) mymods.parts[0].create_extent() mymods.parts[0].extent.unit = 'pages' mymods.parts[0].extent.start = '339' mymods.parts[0].extent.end = '361' xml = mymods.serialize(pretty=True) self.assert_(b'<mods:mods ' in xml) self.assert_(b'xmlns:mods="http://www.loc.gov/mods/v3"' in xml) self.assertTrue(mymods.is_valid(), "MODS created from scratch should be schema-valid")
def get_default_pid(self): '''Default pid logic for DigitalObjects in the Keep. Mint a new ARK via the PID manager, store the ARK in the MODS metadata (if available) or Dublin Core, and use the noid portion of the ARK for a Fedora pid in the site-configured Fedora pidspace.''' if pidman is not None: # pidman wants a target for the new pid '''Get a pidman-ready target for a named view.''' # first just reverse the view name. pid = '%s:%s' % (self.default_pidspace, self.PID_TOKEN) target = reverse(self.NEW_OBJECT_VIEW, kwargs={'pid': pid}) # reverse() encodes the PID_TOKEN and the :, so just unquote the url # (shouldn't contain anything else that needs escaping) target = urllib.unquote(target) # reverse() returns a full path - absolutize so we get scheme & server also target = absolutize_url(target) # pid name is not required, but helpful for managing pids pid_name = self.label # ask pidman for a new ark in the configured pidman domain ark = pidman.create_ark(settings.PIDMAN_DOMAIN, target, name=pid_name) # pidman returns the full, resolvable ark # parse into dictionary with nma, naan, and noid parsed_ark = parse_ark(ark) naan = parsed_ark['naan'] # name authority number noid = parsed_ark['noid'] # nice opaque identifier # if we have a mods datastream, store the ARK as mods:identifier if hasattr(self, 'mods'): # store full uri and short-form ark self.mods.content.identifiers.extend([ mods.Identifier(type='ark', text='ark:/%s/%s' % (naan, noid)), mods.Identifier(type='uri', text=ark) ]) else: # otherwise, add full uri ARK to dc:identifier self.dc.content.identifier_list.append(ark) # use the noid to construct a pid in the configured pidspace return '%s:%s' % (self.default_pidspace, noid) else: # if pidmanager is not available, fall back to default pid behavior return super(DigitalObject, self).get_default_pid()
def get_default_pid(self): if not self._unused_pid_result: pidman = DjangoPidmanRestClient() result = pidman.search_pids(target=UNUSED_PID_URL) # if any were found, use results if result and result['results_count']: self._unused_pid_result = result['results'] # if we have any unused pids, pop one off and use it if self._unused_pid_result: pid_info = self._unused_pid_result.pop() ark = pid_info['targets'][0]['access_uri'] parsed_ark = parse_ark(ark) naan = parsed_ark['naan'] # name authority number noid = parsed_ark['noid'] # nice opaque identifier # use noid as basis for new pid pid = '%s:%s' % (self.default_pidspace, noid) # calculate target to new object target = reverse(self.NEW_OBJECT_VIEW, kwargs={'pid': pid}) # reverse() returns a full path - absolutize so we get scheme & server also target = absolutize_url(target) # update pid ark label from object pidman.update_ark(noid, name=self.label) # update default ark target for new object url pidman.update_ark_target(noid, target_uri=target, active=True) # if we have a mods datastream, store the ARK as mods:identifier if hasattr(self, 'mods'): # store full uri and short-form ark self.mods.content.identifiers.extend([ mods.Identifier(type='ark', text='ark:/%s/%s' % (naan, noid)), mods.Identifier(type='uri', text=ark) ]) # always add full uri ARK to dc:identifier self.dc.content.identifier_list.append(ark) # use the noid to construct a pid in the configured pidspace return '%s:%s' % (self.default_pidspace, noid) else: # if we run out of pids re-use, fall back to default behavior return super(PidReuseDigitalObject, self).get_default_pid()
def test_ark_access_uri(self): # dc dcobj = DcDigitalObject(Mock()) # not set in dc self.assertEqual(None, dcobj.ark_access_uri) dcobj.dc.content.identifier_list.extend( ['http://some.other/uri/foo/', self.testark]) self.assertEqual(self.testark, dcobj.ark_access_uri) # mods modsobj = ModsDigitalObject(Mock()) # not set in mods self.assertEqual(None, modsobj.ark_access_uri) modsobj.mods.content.identifiers.extend([ mods.Identifier(type='uri', text='http://yet.an/other/url'), mods.Identifier(type='uri', text=self.testark) ]) self.assertEqual(self.testark, modsobj.ark_access_uri)
def get_new_pid(self, obj): # TODO: first, make sure object label is set appropriately before # minting new pid or updating an existing one # check to see if there are any unused pids in the rushdie collection # that can be re-assigned unused_pids = pidman.search_pids( domain_uri=settings.PIDMAN_RUSHDIE_DOMAIN, target=settings.PIDMAN_RUSHDIE_UNUSED_URI) total_found = unused_pids.get('results_count', 0) logger.debug('Found %d unused rushdie pids' % total_found) # if any unused pids were found, use the first one if total_found: next_pid = unused_pids['results'][0] noid = next_pid['pid'] print 'Found %d unused rushdie pid%s, using %s' % \ (total_found, 's' if total_found != 1 else '', noid) # update pid metadata to reflect the updated object # update the ark name to match the current object pidman.update_ark(noid=noid, name=obj.label) # update the ark target and ensure it is active # generate the keep url for this object, using the same logic # in keep.common.fedora for minting new pids pid = ':'.join([obj.default_pidspace, noid]) target = reverse(obj.NEW_OBJECT_VIEW, kwargs={'pid': pid}) # reverse() encodes the PID_TOKEN and the :, so just unquote the url # (shouldn't contain anything else that needs escaping) target = urllib.unquote(target) # absolutize the url to include configured keep domain target = absolutize_url(target) # update the existing pid with the new Keep url pidman.update_ark_target(noid=noid, target_uri=target, active=True) ark_uri = next_pid['targets'][0]['access_uri'] parsed_ark = parse_ark(ark_uri) naan = parsed_ark['naan'] # name authority number # short form of ark identifier ark = 'ark:/%s/%s' % (naan, noid) # NOTE: adding to the old object metadata is semi useless, # since the old object will not be saved and the migration, # but it provides convenient access to ark and ark_uri # store the ark in the object metadata # (this logic duplicated from base get_default_pid method) # if we have a mods datastream, store the ARK as mods:identifier if hasattr(obj, 'mods'): # store full uri and short-form ark obj.mods.content.identifiers.extend([ mods.Identifier(type='ark', text=ark), mods.Identifier(type='uri', text=ark_uri) ]) else: # otherwise, add full uri ARK to dc:identifier obj.dc.content.identifier_list.append(ark_uri) # return the pid to be used return pid else: # TEST this: can we use default get next pid for arrangement # objects (including email)? return obj.get_default_pid()