Esempio n. 1
0
    def repair_ark(self, obj):
        ark_target = None
        try:
            ark_target = self.pidman.get_ark_target(noid=obj.noid,
                                                    qualifier='')
        except:
            self.unrepaired_count += 1
            self.log(level=WARNING,
                     message="Failed to find ARK target for %s" % (obj.pid))
            return

        parsed_ark = parse_ark(ark_target['access_uri'])
        naan = parsed_ark['naan']
        noid = parsed_ark['noid']

        if hasattr(obj, 'mods'):
            obj.mods.content.identifiers.extend([
                mods.Identifier(type='ark', text='ark:/%s/%s' % (naan, noid)),
                mods.Identifier(type='uri', text=ark_target['access_uri'])
            ])
        else:
            obj.dc.content.identifier_list(ark_target['access_uri'])

        if self.options['dry_run']:
            self.unrepaired_count += 1
            self.log(message='ARK target found for %s' % obj.pid)
            return

        # save the collection object w/ updated ark
        try:
            self.log(level=INFO, message="Attempting to save %s" % obj.pid)
            obj.save(logMessage='Fixing missing ARK')
            self.repaired_count += 1
        except DigitalObjectSaveFailure:
            self.log(message="An error occurred while saving %s" % (obj.pid))
Esempio n. 2
0
    def test_create_mods(self):
        # test creating MODS from scratch - ensure sub-xmlobject definitions are correct
        # and produce schema-valid MODS
        mymods = mods.MODS()
        mymods.create_title_info()
        # titleInfo subfields
        mymods.title_info.non_sort = 'A '
        mymods.title_info.title = 'Record'
        mymods.title_info.subtitle = ': for testing'
        mymods.title_info.part_number = '1'
        mymods.title_info.part_name = 'first installment'
        mymods.title_info_list.append(mods.TitleInfo(non_sort='An ', title='Alternative Title', subtitle=': for testing',
                                            part_number = '1', part_name='first installment', label='First line'))
        mymods.resource_type = 'text'
        mymods.create_name()
        mymods.name.type = 'personal'
        mymods.name.authority = 'local'
        mymods.name.name_parts.extend([mods.NamePart(type='family', text='Schmoe'),
                                    mods.NamePart(type='given', text='Joe')])
        mymods.name.roles.append(mods.Role(type='text', authority='local',
                                        text='Test Subject'))
        mymods.create_abstract()
        mymods.abstract.text = 'A testing record with made up content.'
        mymods.create_note()
        mymods.note.type = 'general'
        mymods.note.text = 'general note'
        mymods.create_origin_info()
        mymods.origin_info.created.append(mods.DateCreated(date='2001-10-02'))
        mymods.origin_info.issued.append(mods.DateIssued(date='2001-12-01'))
        mymods.create_record_info()
        mymods.record_info.record_id = 'id:1'
        mymods.identifiers.extend([mods.Identifier(type='uri', text='http://ur.l'),
                                 mods.Identifier(type='local', text='332')])
        mymods.access_conditions.extend([mods.AccessCondition(type='restriction', text='unavailable'),
                                       mods.AccessCondition(type='use', text='Tuesdays only')])
        mymods.related_items.extend([mods.RelatedItem(type='host', title='EU Archives'),
                                   mods.RelatedItem(type='isReferencedBy', title='Finding Aid')])
        mymods.subjects.extend([mods.Subject(authority='keyword', topic='automated testing'),
                                mods.Subject(authority='keyword', topic='test records')])
        mymods.parts.append(mods.Part())
        mymods.parts[0].details.extend([mods.PartDetail(type='volume', number='90'),
                                        mods.PartDetail(type='issue', number='2')])
        mymods.parts[0].create_extent()
        mymods.parts[0].extent.unit = 'pages'
        mymods.parts[0].extent.start = '339'
        mymods.parts[0].extent.end = '361'
        xml = mymods.serialize(pretty=True)
        self.assert_(b'<mods:mods ' in xml)
        self.assert_(b'xmlns:mods="http://www.loc.gov/mods/v3"' in xml)

        self.assertTrue(mymods.is_valid(), "MODS created from scratch should be schema-valid")
Esempio n. 3
0
    def get_default_pid(self):
        '''Default pid logic for DigitalObjects in the Keep.  Mint a
        new ARK via the PID manager, store the ARK in the MODS
        metadata (if available) or Dublin Core, and use the noid
        portion of the ARK for a Fedora pid in the site-configured
        Fedora pidspace.'''

        if pidman is not None:
            # pidman wants a target for the new pid
            '''Get a pidman-ready target for a named view.'''

            # first just reverse the view name.
            pid = '%s:%s' % (self.default_pidspace, self.PID_TOKEN)
            target = reverse(self.NEW_OBJECT_VIEW, kwargs={'pid': pid})
            # reverse() encodes the PID_TOKEN and the :, so just unquote the url
            # (shouldn't contain anything else that needs escaping)
            target = urllib.unquote(target)

            # reverse() returns a full path - absolutize so we get scheme & server also
            target = absolutize_url(target)
            # pid name is not required, but helpful for managing pids
            pid_name = self.label
            # ask pidman for a new ark in the configured pidman domain
            ark = pidman.create_ark(settings.PIDMAN_DOMAIN,
                                    target,
                                    name=pid_name)
            # pidman returns the full, resolvable ark
            # parse into dictionary with nma, naan, and noid
            parsed_ark = parse_ark(ark)
            naan = parsed_ark['naan']  # name authority number
            noid = parsed_ark['noid']  # nice opaque identifier

            # if we have a mods datastream, store the ARK as mods:identifier
            if hasattr(self, 'mods'):
                # store full uri and short-form ark
                self.mods.content.identifiers.extend([
                    mods.Identifier(type='ark',
                                    text='ark:/%s/%s' % (naan, noid)),
                    mods.Identifier(type='uri', text=ark)
                ])
            else:
                # otherwise, add full uri ARK to dc:identifier
                self.dc.content.identifier_list.append(ark)

            # use the noid to construct a pid in the configured pidspace
            return '%s:%s' % (self.default_pidspace, noid)
        else:
            # if pidmanager is not available, fall back to default pid behavior
            return super(DigitalObject, self).get_default_pid()
Esempio n. 4
0
    def get_default_pid(self):
        if not self._unused_pid_result:
            pidman = DjangoPidmanRestClient()
            result = pidman.search_pids(target=UNUSED_PID_URL)
            # if any were found, use results
            if result and result['results_count']:
                self._unused_pid_result = result['results']

        # if we have any unused pids, pop one off and use it
        if self._unused_pid_result:
            pid_info = self._unused_pid_result.pop()
            ark = pid_info['targets'][0]['access_uri']
            parsed_ark = parse_ark(ark)
            naan = parsed_ark['naan']  # name authority number
            noid = parsed_ark['noid']  # nice opaque identifier


            # use noid as basis for new pid
            pid = '%s:%s' % (self.default_pidspace, noid)
            # calculate target to new object
            target = reverse(self.NEW_OBJECT_VIEW, kwargs={'pid': pid})
            # reverse() returns a full path - absolutize so we get scheme & server also
            target = absolutize_url(target)
            # update pid ark label from object
            pidman.update_ark(noid, name=self.label)
            # update default ark target for new object url
            pidman.update_ark_target(noid, target_uri=target, active=True)

            # if we have a mods datastream, store the ARK as mods:identifier
            if hasattr(self, 'mods'):
                # store full uri and short-form ark
                self.mods.content.identifiers.extend([
                    mods.Identifier(type='ark', text='ark:/%s/%s' % (naan, noid)),
                    mods.Identifier(type='uri', text=ark)
                    ])

            # always add full uri ARK to dc:identifier
            self.dc.content.identifier_list.append(ark)

            # use the noid to construct a pid in the configured pidspace
            return '%s:%s' % (self.default_pidspace, noid)

        else:
            # if we run out of pids re-use, fall back to default behavior
            return super(PidReuseDigitalObject, self).get_default_pid()
Esempio n. 5
0
    def test_ark_access_uri(self):
        # dc
        dcobj = DcDigitalObject(Mock())
        # not set in dc
        self.assertEqual(None, dcobj.ark_access_uri)
        dcobj.dc.content.identifier_list.extend(
            ['http://some.other/uri/foo/', self.testark])
        self.assertEqual(self.testark, dcobj.ark_access_uri)

        # mods
        modsobj = ModsDigitalObject(Mock())
        # not set in mods
        self.assertEqual(None, modsobj.ark_access_uri)
        modsobj.mods.content.identifiers.extend([
            mods.Identifier(type='uri', text='http://yet.an/other/url'),
            mods.Identifier(type='uri', text=self.testark)
        ])
        self.assertEqual(self.testark, modsobj.ark_access_uri)
Esempio n. 6
0
    def get_new_pid(self, obj):
        # TODO: first, make sure object label is set appropriately before
        # minting new pid or updating an existing one

        # check to see if there are any unused pids in the rushdie collection
        # that can be re-assigned
        unused_pids = pidman.search_pids(
            domain_uri=settings.PIDMAN_RUSHDIE_DOMAIN,
            target=settings.PIDMAN_RUSHDIE_UNUSED_URI)

        total_found = unused_pids.get('results_count', 0)
        logger.debug('Found %d unused rushdie pids' % total_found)

        # if any unused pids were found, use the first one
        if total_found:
            next_pid = unused_pids['results'][0]
            noid = next_pid['pid']

            print 'Found %d unused rushdie pid%s, using %s' % \
                (total_found, 's' if total_found != 1 else '', noid)

            # update pid metadata to reflect the updated object
            # update the ark name to match the current object
            pidman.update_ark(noid=noid, name=obj.label)
            # update the ark target and ensure it is active

            # generate the keep url for this object, using the same logic
            # in keep.common.fedora for minting new pids
            pid = ':'.join([obj.default_pidspace, noid])
            target = reverse(obj.NEW_OBJECT_VIEW, kwargs={'pid': pid})
            # reverse() encodes the PID_TOKEN and the :, so just unquote the url
            # (shouldn't contain anything else that needs escaping)
            target = urllib.unquote(target)
            # absolutize the url to include configured keep domain
            target = absolutize_url(target)
            # update the existing pid with the new Keep url
            pidman.update_ark_target(noid=noid, target_uri=target, active=True)

            ark_uri = next_pid['targets'][0]['access_uri']
            parsed_ark = parse_ark(ark_uri)
            naan = parsed_ark['naan']  # name authority number
            # short form of ark identifier
            ark = 'ark:/%s/%s' % (naan, noid)

            # NOTE: adding to the old object metadata is semi useless,
            # since the old object will not be saved and the migration,
            # but it provides convenient access to ark and ark_uri

            # store the ark in the object metadata
            # (this logic duplicated from base get_default_pid method)
            # if we have a mods datastream, store the ARK as mods:identifier
            if hasattr(obj, 'mods'):
                # store full uri and short-form ark
                obj.mods.content.identifiers.extend([
                    mods.Identifier(type='ark', text=ark),
                    mods.Identifier(type='uri', text=ark_uri)
                ])
            else:
                # otherwise, add full uri ARK to dc:identifier
                obj.dc.content.identifier_list.append(ark_uri)

            # return the pid to be used
            return pid

        else:
            # TEST this: can we use default get next pid for arrangement
            # objects (including email)?
            return obj.get_default_pid()