예제 #1
0
def view(request, pid):
    '''View a single :class:`~keep.video.models.Video`.
    User must either have general view video permissions, or if they have
    view researcher view, the object must be researcher accessible
    (based on rights codes).
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid=pid, type=Video)
    # # user either needs view video permissions OR
    # # if they can view researcher audio and object must be researcher-accessible

    viewable = request.user.has_perm('video.view_video') or \
        (request.user.has_perm('video.view_researcher_video') and
         bool(obj.researcher_access))

    if not viewable:
        return prompt_login_or_403(request)

    try:
        if not obj.has_requisite_content_models:
            raise Http404
    except:
        raise Http404


    return render(request, 'video/view.html', {"resource": obj})
예제 #2
0
def view(request, pid):
    '''View a single :class:`~keep.video.models.Video`.
    User must either have general view video permissions, or if they have
    view researcher view, the object must be researcher accessible
    (based on rights codes).
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid=pid, type=Video)
    # # user either needs view video permissions OR
    # # if they can view researcher audio and object must be researcher-accessible

    viewable = request.user.has_perm('video.view_video') or \
        (request.user.has_perm('video.view_researcher_video') and
         bool(obj.researcher_access))

    if not viewable:
        return prompt_login_or_403(request)

    try:
        if not obj.has_requisite_content_models:
            raise Http404
    except:
        raise Http404

    return render(request, 'video/view.html', {"resource": obj})
예제 #3
0
파일: models.py 프로젝트: jrhoads/TheKeep
    def find_by_collection_number(num, parent=None):
        '''Find a CollectionObject in Fedora by collection number (or
        source id), optionally limited by parent collection (owning
        archive).

        :param num: collection number to search for (aka source id)
        :param parent: optional; archive that the collection must belong to
        :return: generator of any matching items, as instances of
            :class:`CollectionObject`
        '''
        solr = solr_interface()
        solrquery = solr.query(content_model=CollectionObject.COLLECTION_CONTENT_MODEL,
                               pid='%s:*' % settings.FEDORA_PIDSPACE,
                               source_id=int(num))
        # if parent is specified, restrict by archive id (parent should be a pid)
        if parent is not None:
            # remove prefix on parent
            prefix ='info:fedora/'
            if parent.startswith(prefix):
                parent = parent[12:]
            solrquery = solrquery.query(archive_id=parent)
        # by default, only returns 10; get everything
        # - solr response is a list of dictionary with collection info
        # use dictsort in template for sorting where appropriate
        collections = solrquery.paginate(start=0, rows=1000).execute()

        # return a generator of matching items, as instances of CollectionObject
        repo = Repository()
        for coll in collections:
            yield repo.get_object(coll['pid'], type=CollectionObject)
예제 #4
0
파일: models.py 프로젝트: jrhoads/TheKeep
    def archives(format=None):
        """Find Archives objects, to which CollectionObjects belong.

        :returns: list of :class:`CollectionObject`
        :rtype: list
        """
        # NOTE: formerly called top-level collections or Repository /
        # Owning Repository; should now be called archive and labeled
        # as such anywhere user-facing

        # TODO: search logic very similar to item_collections and
        # subcollections methods; consider refactoring search logic
        # into a common search method.

        if CollectionObject._archives is None:
            # find all objects with cmodel collection-1.1 and no parents

            # search solr for collection objects with NO parent collection id
            solr = solr_interface()
            # NOTE: not filtering on pidspace, since top-level objects are loaded as fixtures
            # and may not match the configured pidspace in a dev environment
            solrquery = solr.query(content_model=CollectionObject.COLLECTION_CONTENT_MODEL)
            collections = solrquery.exclude(archive_id__any=True).sort_by('title_exact').execute()
            # store the solr response format
            CollectionObject._archives = collections

        if format == dict:
            return CollectionObject._archives

        # otherwise, initialize as instances of CollectionObject
        repo = Repository()
        return [repo.get_object(arch['pid'], type=CollectionObject)
                                                       for arch in CollectionObject._archives]
예제 #5
0
    def handle(self, *args, **options):
        self.options = options
        self.repaired_count = 0
        self.unrepaired_count = 0

        repo = Repository()
        self.pidman = DjangoPidmanRestClient()

        # populate list of objects to be processed
        objects = []
        for pid in args:
            try:
                obj = repo.get_object(pid=pid, type=CollectionObject)
                if obj.has_requisite_content_models:
                    objects.append(obj)
                else:
                    obj = repo.get_object(pid=pid, type=AudioObject)
                    if obj.has_requisite_content_models:
                        objects.append(obj)
            except Exception:
                self.log(message="Could not find Collection or Audio object for: %s" % pid)

        # get list of all collections from the repository
        # limited to the COLLECTION_CONTENT_MODEL as well as returns a Keep specific collection object
        if not args:
            objects = repo.get_objects_with_cmodel(CollectionObject.COLLECTION_CONTENT_MODEL, type=CollectionObject)

        if not objects:
            self.log(message="No Collections were found.")

        for obj in objects:
            self.repair_ark(obj)

        self.log(message="\n\n%s ARKs repaired\n%s ARKs were not repaired" % (self.repaired_count, self.unrepaired_count), no_label=True)
예제 #6
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # test EmailMessage
        self.email = self.repo.get_object(type=EmailMessage)
        self.email.cerp.content.from_list = ['*****@*****.**']
        self.email.cerp.content.to_list = ['*****@*****.**']
        self.email.cerp.content.subject_list = ['Interesting Subject']
예제 #7
0
 def archives(format=None):
     if format == dict:
         return [{'title': nick, 'pid': pid}
                 for nick,pid in settings.PID_ALIASES.iteritems()]
         
     if not hasattr(FedoraFixtures, '_archives'):
         repo = Repository()
         FedoraFixtures._archives = [repo.get_object(pid, type=CollectionObject)
                                     for pid in settings.PID_ALIASES.itervalues()]
     return FedoraFixtures._archives
예제 #8
0
def download(request, pid):
    'Download disk image datastream contents'
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=DiskImage)
    extra_headers = {
        'Content-Disposition': "attachment; filename=%s.%s" % \
            (obj.noid, obj.provenance.content.object.latest_format.name)
    }
    return raw_datastream(request, pid, DiskImage.content.id,
        repo=repo, headers=extra_headers)
예제 #9
0
 def englishdocs_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'English documents collection'
     obj.mods.content.title = 'English documents collection'
     obj.mods.content.source_id = '309'
     obj.collection = repo.get_object(FedoraFixtures.archives()[1].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(mods.DateCreated(date=1509, point='start'))
     obj.mods.content.origin_info.created.append(mods.DateCreated(date=1805, point='end'))
     return obj
예제 #10
0
 def simple_collection(label=None, status=None, pid=None):
     repo = Repository()
     obj = repo.get_object(type=SimpleCollection)
     if label is not None:
         obj.label = label
     obj.mods.content.create_restrictions_on_access()
     if status is not None:
         obj.mods.content.restrictions_on_access.text = status
     if pid is not None:
         obj.pid = pid
     return obj
예제 #11
0
 def simple_collection(label=None, status=None, pid=None):
     repo = Repository()
     obj = repo.get_object(type=SimpleCollection)
     if label is not None:
         obj.label = label
     obj.mods.content.create_restrictions_on_access()
     if status is not None:
         obj.mods.content.restrictions_on_access.text = status
     if pid is not None:
         obj.pid = pid
     return obj
예제 #12
0
 def all():
     'Find all Audio objects by content model within the configured pidspace.'
     search_opts = {
         'type': AudioObject,
         # restrict to objects in configured pidspace
         'pid__contains': '%s:*' % settings.FEDORA_PIDSPACE,
         # restrict by cmodel in dc:format
         'format__contains': AudioObject.AUDIO_CONTENT_MODEL,
     }
     repo = Repository()
     return repo.find_objects(**search_opts)
예제 #13
0
파일: views.py 프로젝트: jrhoads/TheKeep
def download(request, pid):
    'Download disk image datastream contents'
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=DiskImage)
    extra_headers = {
        'Content-Disposition': "attachment; filename=%s.%s" % \
            (obj.noid, obj.provenance.content.object.latest_format.name)
    }
    return raw_datastream(request,
                          pid,
                          DiskImage.content.id,
                          repo=repo,
                          headers=extra_headers)
예제 #14
0
 def englishdocs_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'English documents collection'
     obj.mods.content.title = 'English documents collection'
     obj.mods.content.source_id = '309'
     obj.collection = repo.get_object(FedoraFixtures.archives()[1].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1509, point='start'))
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1805, point='end'))
     return obj
예제 #15
0
 def esterbrook_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'Thomas Esterbrook letter books'
     obj.mods.content.title = 'Thomas Esterbrook letter books'
     obj.mods.content.source_id = '123'
     obj.collection = repo.get_object(FedoraFixtures.archives()[2].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(mods.DateCreated(date=1855, point='start'))
     obj.mods.content.origin_info.created.append(mods.DateCreated(date=1861, point='end'))
     obj.mods.content.create_name()
     obj.mods.content.name.name_parts.append(mods.NamePart(text='Thomas Esterbrook'))
     return obj
예제 #16
0
 def rushdie_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'Salman Rushdie Collection'
     obj.mods.content.title = 'Salman Rushdie Collection'
     obj.mods.content.source_id = '1000'
     obj.collection = repo.get_object(FedoraFixtures.archives()[1].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(mods.DateCreated(date=1947, point='start'))
     obj.mods.content.origin_info.created.append(mods.DateCreated(date=2008, point='end'))
     obj.mods.content.create_name()
     obj.mods.content.name.name_parts.append(mods.NamePart(text='Salman Rushdie'))
     return obj
예제 #17
0
def _objects_by_type(type_uri, type=None):
    """
    Returns a list of objects with the specified type_uri as objects of the specified type
    :param type_uri: The uri of the type being searched
    :param type: The type of object that should be returned
    """
    repo = Repository()

    pids = repo.risearch.get_subjects(RDF.type, type_uri)
    pids_list = list(pids)

    for pid in pids_list:
        yield repo.get_object(pid=pid, type=type)
예제 #18
0
    def archives(format=None):
        if format == dict:
            return [{
                'title': nick,
                'pid': pid
            } for nick, pid in settings.PID_ALIASES.iteritems()]

        if not hasattr(FedoraFixtures, '_archives'):
            repo = Repository()
            FedoraFixtures._archives = [
                repo.get_object(pid, type=CollectionObject)
                for pid in settings.PID_ALIASES.itervalues()
            ]
        return FedoraFixtures._archives
예제 #19
0
파일: views.py 프로젝트: jrhoads/TheKeep
def tasks(request, pid):
    '''Manage tasks associated with an :class:`~keep.audio.models.AudioObject`.
    Currently, the only supported functionality is to queue access
    copy conversion; this should be done by POSTing the type of task to
    be queued, i.e. **generate access copy**.

    Supported tasks:

        * **generate access copy** - queue access copy conversion for an audio
            item by pid.  Returns a status message as the body of a plain/text response

    :param pid: the pid of the object for which tasks should be queued

    '''
    if request.method == 'POST':
        status = "queued"
        task_type = request.POST.get('task', None)

        # TODO May want to prevent queuing of more than one at a time or within a time period.
        # TODO For now javascript disables the link until the page is refreshed.

        # currently the only supported task is
        if task_type == 'generate access copy':
            try:
                repo = Repository(request=request)
                obj = repo.get_object(pid, type=AudioObject)

                # if object doesn't exist or isn't an audio item, 404
                if not obj.exists or not obj.has_requisite_content_models:
                    raise Http404

                queue_access_copy(obj)
                status = 'Successfully queued access copy conversion'

            except Exception as err:
                # re-raise any 404 error
                if isinstance(err, Http404):
                    raise

                logger.error('Error queueing access copy conversion for %s : %s' % \
                    (pid, err))
                status = 'Error queueing access copy conversion (%s)' % err

            return HttpResponse(status, content_type='text/plain')

        # unsupported task
        else:
            return HttpResponse('Task "%s" is not supported' % task_type,
                                content_type='text/plain',
                                status=500)
예제 #20
0
def create_from_findingaid(request):
    form = FindCollection(request.POST)
    if not form.is_valid():
        messages.error(request, 'Form is not valid; please try again.')
    else:
        data = form.cleaned_data
        q = CollectionObject.item_collection_query()
        # submitted value is pid alias; lookup pid for solr query
        archive_id = settings.PID_ALIASES[data['archive']]
        q = q.query(archive_id=archive_id,
                    source_id=data['collection'])
        # if collection is found, redirect to collection view with message
        if q.count():
            messages.info(request, 'Found %d collection%s for %s %s.' %
                          (q.count(), 's' if q.count() != 1 else '',
                           data['archive'].upper(), data['collection']))
            return HttpResponseSeeOtherRedirect(reverse('collection:view',
                kwargs={'pid': q[0]['pid']}))

        else:
            # otherwise, create the new record and redirect to new
            # collection edit page
            repo = Repository(request=request)
            coll_id = data['collection']
            coll = None
            try:
                archive = repo.get_object(archive_id, type=CollectionObject)
                fa = FindingAid.find_by_unitid(unicode(coll_id),
                                               archive.mods.content.title)
                coll = fa.generate_collection()
                coll.collection = archive
                coll.save()
                messages.info(request, 'Added %s for collection %s: %s'
                              % (coll, coll_id, coll.mods.content.title))

                return HttpResponseSeeOtherRedirect(
                    reverse('collection:edit', kwargs={'pid': coll.pid}))

            except DoesNotExist:
                messages.error(request, 'No EAD found for %s in %s' %
                               (coll_id, data['archive'].upper()))
            except ReturnedMultiple:
                messages.error(request, 'Multiple EADs found for %s in %s' %
                               (coll_id, data['archive'].upper()))
            except RequestFailed as err:
                print err
                messages.error(request, 'Failed to save new collection')

    return HttpResponseSeeOtherRedirect(reverse('repo-admin:dashboard'))
예제 #21
0
def tasks(request, pid):
    '''Manage tasks associated with an :class:`~keep.audio.models.AudioObject`.
    Currently, the only supported functionality is to queue access
    copy conversion; this should be done by POSTing the type of task to
    be queued, i.e. **generate access copy**.

    Supported tasks:

        * **generate access copy** - queue access copy conversion for an audio
            item by pid.  Returns a status message as the body of a plain/text response

    :param pid: the pid of the object for which tasks should be queued

    '''
    if request.method == 'POST':
        status = "queued"
        task_type = request.POST.get('task', None)

        # TODO May want to prevent queuing of more than one at a time or within a time period.
        # TODO For now javascript disables the link until the page is refreshed.

        # currently the only supported task is
        if task_type == 'generate access copy':
            try:
                repo = Repository(request=request)
                obj = repo.get_object(pid, type=AudioObject)

                # if object doesn't exist or isn't an audio item, 404
                if not obj.exists or not obj.has_requisite_content_models:
                    raise Http404

                queue_access_copy(obj)
                status = 'Successfully queued access copy conversion'

            except Exception as err:
                # re-raise any 404 error
                if isinstance(err, Http404):
                    raise

                logger.error('Error queueing access copy conversion for %s : %s' % \
                    (pid, err))
                status = 'Error queueing access copy conversion (%s)' % err

            return HttpResponse(status, content_type='text/plain')

        # unsupported task
        else:
            return HttpResponse('Task "%s" is not supported' % task_type,
                content_type='text/plain', status=500)
예제 #22
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # create test collection
        coll = self.repo.get_object(type=CollectionObject)
        coll.pid = '%s:parent-1' % settings.FEDORA_PIDSPACE
        coll.mods.content.source_id = '12345'
        coll.save()
        self.pids.append(coll.pid)

        #create test arrangement object
        self.arr = self.repo.get_object(type=ArrangementObject)
        self.arr.pid = 'foo:1'
        self.arr.collection = coll
예제 #23
0
class Command(BaseCommand):
    '''Generate access copies for PIDs specified on the command line.'''
    help = __doc__

    def handle(self, *args, **options):
        self.verbosity = options['verbosity']
        self.repo = Repository()

        for pid in args:
            self.process_pid(pid)

    def process_pid(self, pid):
        '''Process a single PID by looking it up in the repository, figuring
        out what kind of processing it needs based on its object type, and
        doing that.
        '''

        obj = self.repo.get_object(pid=pid,
                                   type=self.repo.infer_object_subtype)
        if not obj.exists:
            if self.verbosity >= 1:
                print "No such PID; skipped:", pid
                return

        if isinstance(obj, AudioObject):
            if self.verbosity >= 2:
                print "Generating audio access copy:", pid
            queue_access_copy(obj)
        else:
            if self.verbosity >= 1:
                print "Unhandled  object type; skipped:", pid
예제 #24
0
 def rushdie_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'Salman Rushdie Collection'
     obj.mods.content.title = 'Salman Rushdie Collection'
     obj.mods.content.source_id = '1000'
     obj.collection = repo.get_object(FedoraFixtures.archives()[1].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1947, point='start'))
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=2008, point='end'))
     obj.mods.content.create_name()
     obj.mods.content.name.name_parts.append(
         mods.NamePart(text='Salman Rushdie'))
     return obj
예제 #25
0
 def esterbrook_collection():
     repo = Repository()
     obj = repo.get_object(type=CollectionObject)
     obj.label = 'Thomas Esterbrook letter books'
     obj.mods.content.title = 'Thomas Esterbrook letter books'
     obj.mods.content.source_id = '123'
     obj.collection = repo.get_object(FedoraFixtures.archives()[2].uri)
     obj.mods.content.create_origin_info()
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1855, point='start'))
     obj.mods.content.origin_info.created.append(
         mods.DateCreated(date=1861, point='end'))
     obj.mods.content.create_name()
     obj.mods.content.name.name_parts.append(
         mods.NamePart(text='Thomas Esterbrook'))
     return obj
class Command(BaseCommand):
    '''Generate access copies for PIDs specified on the command line.'''
    help = __doc__

    def handle(self, *args, **options):
        self.verbosity = options['verbosity']
        self.repo = Repository()

        for pid in args:
            self.process_pid(pid)

    def process_pid(self, pid):
        '''Process a single PID by looking it up in the repository, figuring
        out what kind of processing it needs based on its object type, and
        doing that.
        '''

        obj = self.repo.get_object(pid=pid, type=self.repo.infer_object_subtype)
        if not obj.exists:
            if self.verbosity >= 1:
                print "No such PID; skipped:", pid
                return

        if isinstance(obj, AudioObject):
            if self.verbosity >= 2:
                print "Generating audio access copy:", pid
            queue_access_copy(obj)
        else:
            if self.verbosity >= 1:
                print "Unhandled  object type; skipped:", pid
예제 #27
0
    def init_from_file(filename,
                       initial_label=None,
                       request=None,
                       checksum=None,
                       mimetype=None):
        '''Static method to create a new :class:`AudioObject` instance from
        a file.  Sets the object label and metadata title based on the initial
        label specified, or file basename.  Calculates and stores the duration
        based on the file. Also sets the following default metadata values:

            * mods:typeOfResource = "sound recording"
            * dt:codecQuality = "lossless"

        :param filename: full path to the audio file, as a string
        :param initial_label: optional initial label to use; if not specified,
            the base name of the specified file will be used
        :param request: :class:`django.http.HttpRequest` passed into a view method;
            must be passed in order to connect to Fedora as the currently-logged
            in user
        :param checksum: the checksum of the file being sent to fedora.
        :returns: :class:`AudioObject` initialized from the file
        '''
        if initial_label is None:
            initial_label = os.path.basename(filename)
        repo = Repository(request=request)
        obj = repo.get_object(type=AudioObject)
        # set initial object label from the base filename
        obj.label = initial_label
        obj.dc.content.title = obj.mods.content.title = obj.label
        obj.audio.content = open(
            filename)  # FIXME: at what point does/should this get closed?
        # Set the file checksum, if set.
        obj.audio.checksum = checksum
        # set content datastream mimetype if passed in
        if mimetype is not None:
            obj.audio.mimetype = mimetype
        #Get the label, minus the ".wav" (mimetype indicates that)
        obj.audio.label = initial_label[:-4]
        # set initial mods:typeOfResource - all AudioObjects default to sound recording
        obj.mods.content.resource_type = 'sound recording'
        # set codec quality to lossless in digital tech metadata
        # - default for AudioObjects, should only accept lossless audio for master file
        obj.digitaltech.content.codec_quality = 'lossless'
        # get wav duration and store in digital tech metadata
        obj.digitaltech.content.duration = '%d' % round(wav_duration(filename))

        return obj
예제 #28
0
    def disk_images(self):
        self.stderr.write('Disk images')
        ### disk images
        # representative sample of aff and ad1
        # DO NOT include anything in these collections:
        # Trethewey (ghsdj), Rushdie (94k9k), Mackey (g1btw),
        # Clifton (94kf4), and Grennan (9k0st)

        solr = solr_interface()
        repo = Repository()
        q = solr.query(content_model=DiskImage.DISKIMAGE_CONTENT_MODEL) \
                .exclude(collection_id=self.collections['trethewey']) \
                .exclude(collection_id=self.collections['rushdie']) \
                .exclude(collection_id=self.collections['mackey']) \
                .exclude(collection_id=self.collections['clifton']) \
                .exclude(collection_id=self.collections['grennan']) \
                .field_limit('pid')
        if self.verbosity >= self.v_normal:
            self.stderr.write(
                'Found %d disk images not in restricted collections' %
                q.count())

        # currently there is no way to filter on format or size in either
        # solr or fedora risearch
        # so, go through individually and group them by type,
        # then sort by size and pick the smallest ones
        diskimgs_by_type = defaultdict(list)
        for result in q:
            diskimg = repo.get_object(result['pid'], type=DiskImage)
            if not diskimg.exists:
                if self.verbosity >= self.v_normal:
                    self.stderr.write('Referenced disk image %s does not exist or is inaccessible' \
                        % result['pid'])
                continue

            fmt = diskimg.provenance.content.object.format.name
            diskimgs_by_type[fmt].append(diskimg)

        for fmt, diskimages in diskimgs_by_type.iteritems():
            if self.verbosity >= self.v_normal:
                self.stderr.write('Selecting %s disk images' % fmt)
            # sort on binary file size so we sync the smallest ones
            diskimages = sorted(diskimages,
                                key=lambda diskimg: diskimg.content.size)
            # use the first 10 of each type
            for d in diskimages[:10]:
                self.stdout.write(d.pid)
예제 #29
0
파일: views.py 프로젝트: jrhoads/TheKeep
def view_audit_trail(request, pid):
    'Access XML audit trail for an audio object'
    # initialize local repo with logged-in user credentials & call eulfedora view
    # FIXME: redundant across collection/arrangement/audio apps; consolidate?
    return raw_audit_trail(request,
                           pid,
                           type=AudioObject,
                           repo=Repository(request=request))
예제 #30
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        #Create a simple Collection
        self.sc = self.repo.get_object(type=SimpleCollection)
        self.sc.label = "SimpleCollection For Test"
        self.sc.save()
        self.pids.append(self.sc.pid)

        #Create a Master Collection
        self.mc = self.repo.get_object(type=CollectionObject)
        self.mc.label = "MasterCollection For Test"
        self.mc.save()
        self.pids.append(self.mc.pid)

        #Create a a DigitalObject
        self.digObj = self.repo.get_object(type=RushdieArrangementFile)
        self.digObj.label = "Object For Test"
        self.digObj.save()
        self.pids.append(self.digObj.pid)
        self.digObj.api.addDatastream(self.digObj.pid,
                                      "MARBL-MACTECH",
                                      "MARBL-MACTECH",
                                      mimeType="application/xml",
                                      content=self.MM_FIXTURE)
        self.digObj.api.addDatastream(self.digObj.pid,
                                      "MARBL-ANALYSIS",
                                      "MARBL-ANALYSIS",
                                      mimeType="application/xml",
                                      content=self.MA_FIXTURE)
        #Remove Arrangement model so it can be added later
        relation = (self.digObj.uriref, modelns.hasModel,
                    "info:fedora/emory-control:Arrangement-1.0")
        self.digObj.rels_ext.content.remove(relation)
        self.digObj.save()

        #Setup Command
        self.cmd = migrate_rushdie.Command()
        self.cmd.verbosity = 1
        self.cmd.v_normal = 1
        self.cmd.v_none = 0
        self.cmd.simple_collection = self.sc
        self.cmd.stdout = sys.stdout
        self.cmd.CONTENT_MODELS = CONTENT_MODELS
        self.cmd.repo = self.repo
예제 #31
0
파일: models.py 프로젝트: jrhoads/TheKeep
    def find_by_field(field, value, repo=None):
        '''
        Static method to find a single :class:`EmailMessage` by an indexed
        value.  Looks for the item in Solr and
        returns an :class:`EmailMessage` instance initialized
        from the repository if a single match is found for the
        requested field and value.

        Raises :class:`django.core.exceptions.MultipleObjectsReturned`
        if more than one match is found; raises
        :class:`django.core.exceptions.ObjectDoesNotExist` if no
        matches are found in the Solr index.

        :param field: solr field to search
        :param value: value to search on in the specified field

        :param repo: optional :class:`eulfedora.server.Repository`
            to use an existing connection with specific credentials

        :returns: :class:`EmailMessage`


        '''
        solr = solr_interface()
        search_terms = {
            field: value,
            'content_model': ArrangementObject.ARRANGEMENT_CONTENT_MODEL
        }
        q = solr.query(**search_terms).field_limit('pid')

        # check that we found one and only one
        found = len(q)
        # borrowing custom django exceptions for not found / too many
        # matches
        if found > 1:
            raise MultipleObjectsReturned('Found %d records with %s %s' % \
                                          (found, field, value))
        if not found:
            raise ObjectDoesNotExist('No record found with %s %s' %
                                     (field, value))

        if repo is None:
            repo = Repository()

        return repo.get_object(q[0]['pid'], type=EmailMessage)
예제 #32
0
def view(request, pid):
    '''View a single :class:`~keep.collection.models.CollectionObject`,
    with a paginated list of all items in that collection.
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if pid doesn't exist or isn't a collection, 404
    if not obj.exists or not obj.has_requisite_content_models:
        raise Http404

    # search for all items that belong to this collection
    q = obj.solr_items_query()
    q = q.sort_by('date_created') \
         .sort_by('date_issued') \
         .sort_by('title_exact')
    # filter by logged-in user permissions
    # (includes researcher-accessible content filter when appropriate)
    q = filter_by_perms(q, request.user)

    # if current user can only view researcher-accesible collections and
    # no items were found, they don't have permission to view this collection
    if not request.user.has_perm('collection.view_collection') and \
           request.user.has_perm('collection.view_researcher_collection') and \
           q.count() == 0:
       return prompt_login_or_403(request)

    # paginate the solr result set
    paginator = Paginator(q, 30)
    try:
        page = int(request.GET.get('page', '1'))
    except ValueError:
        page = 1
    try:
        results = paginator.page(page)
    except (EmptyPage, InvalidPage):
        results = paginator.page(paginator.num_pages)

    # url parameters for pagination links
    url_params = request.GET.copy()
    if 'page' in url_params:
        del url_params['page']

    return TemplateResponse(request, 'collection/view.html',
        {'collection': obj, 'items': results,
         'url_params': urlencode(url_params)})
예제 #33
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # test EmailMessage
        self.email = self.repo.get_object(type=EmailMessage)
        self.email.cerp.content.from_list = ['*****@*****.**']
        self.email.cerp.content.to_list = ['*****@*****.**']
        self.email.cerp.content.subject_list = ['Interesting Subject']
예제 #34
0
def playlist(request, pid):
    # FIXME: this needs last-modified so browser can cache!!!

    # NOTE: preliminary logic duplicated from view above
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=CollectionObject)
    # if pid doesn't exist or isn't a collection, 404
    if not obj.exists or not obj.has_requisite_content_models:
        raise Http404

    # search for all items that belong to this collection
    q = obj.solr_items_query()
    q = q.sort_by('date_created') \
         .sort_by('date_issued') \
         .sort_by('title_exact')
    # filter by logged-in user permissions
    # (includes researcher-accessible content filter when appropriate)
    q = filter_by_perms(q, request.user)

    # if current user can only view researcher-accesible collections and
    # no items were found, they don't have permission to view this collection
    if not request.user.has_perm('collection.view_collection') and \
           request.user.has_perm('collection.view_researcher_collection') and \
           q.count() == 0:
       return prompt_login_or_403(request)

    playlist = []
    for result in q:
        # skip non-audio or audio without access copies
        if result['object_type'] != 'audio' or not result['has_access_copy']:
            continue
        data = {
            'title': result['title'],
            'free': False  # explicitly mark as not downloadable
        }
        if result['access_copy_mimetype'] == 'audio/mp4':
            audio_type = 'm4a'
        else:
            audio_type = 'mp3'
        data[audio_type] = reverse('audio:download-compressed-audio',
            kwargs={'pid': result['pid'], 'extension': audio_type})
        playlist.append(data)

    return HttpResponse(json.dumps(playlist), content_type='application/json')
예제 #35
0
    def find_by_field(field, value, repo=None):
        '''
        Static method to find a single :class:`EmailMessage` by an indexed
        value.  Looks for the item in Solr and
        returns an :class:`EmailMessage` instance initialized
        from the repository if a single match is found for the
        requested field and value.

        Raises :class:`django.core.exceptions.MultipleObjectsReturned`
        if more than one match is found; raises
        :class:`django.core.exceptions.ObjectDoesNotExist` if no
        matches are found in the Solr index.

        :param field: solr field to search
        :param value: value to search on in the specified field

        :param repo: optional :class:`eulfedora.server.Repository`
            to use an existing connection with specific credentials

        :returns: :class:`EmailMessage`


        '''
        solr = solr_interface()
        search_terms = {
            field: value,
            'content_model': ArrangementObject.ARRANGEMENT_CONTENT_MODEL
        }
        q = solr.query(**search_terms).field_limit('pid')

        # check that we found one and only one
        found = len(q)
        # borrowing custom django exceptions for not found / too many
        # matches
        if found > 1:
            raise MultipleObjectsReturned('Found %d records with %s %s' % \
                                          (found, field, value))
        if not found:
            raise ObjectDoesNotExist('No record found with %s %s' % (field, value))

        if repo is None:
            repo = Repository()

        return repo.get_object(q[0]['pid'], type=EmailMessage)
예제 #36
0
    def disk_images(self):
        self.stderr.write('Disk images')
        ### disk images
        # representative sample of aff and ad1
        # DO NOT include anything in these collections:
        # Trethewey (ghsdj), Rushdie (94k9k), Mackey (g1btw),
        # Clifton (94kf4), and Grennan (9k0st)

        solr = solr_interface()
        repo = Repository()
        q = solr.query(content_model=DiskImage.DISKIMAGE_CONTENT_MODEL) \
                .exclude(collection_id=self.collections['trethewey']) \
                .exclude(collection_id=self.collections['rushdie']) \
                .exclude(collection_id=self.collections['mackey']) \
                .exclude(collection_id=self.collections['clifton']) \
                .exclude(collection_id=self.collections['grennan']) \
                .field_limit('pid')
        if self.verbosity >= self.v_normal:
            self.stderr.write('Found %d disk images not in restricted collections' % q.count())

        # currently there is no way to filter on format or size in either
        # solr or fedora risearch
        # so, go through individually and group them by type,
        # then sort by size and pick the smallest ones
        diskimgs_by_type = defaultdict(list)
        for result in q:
            diskimg = repo.get_object(result['pid'], type=DiskImage)
            if not diskimg.exists:
                if self.verbosity >= self.v_normal:
                    self.stderr.write('Referenced disk image %s does not exist or is inaccessible' \
                        % result['pid'])
                continue

            fmt = diskimg.provenance.content.object.format.name
            diskimgs_by_type[fmt].append(diskimg)

        for fmt, diskimages in diskimgs_by_type.iteritems():
            if self.verbosity >= self.v_normal:
                self.stderr.write('Selecting %s disk images' % fmt)
            # sort on binary file size so we sync the smallest ones
            diskimages = sorted(diskimages, key=lambda diskimg: diskimg.content.size)
            # use the first 10 of each type
            for d in diskimages[:10]:
                self.stdout.write(d.pid)
예제 #37
0
    def handle(self, batch_id=None, folder_path=None, verbosity=1, noact=False,
               max_ingest=None, skip_purge=False, purge_only=False, *args, **options):

        # check batch object
        if batch_id is None:
            raise CommandError('Processing batch id is required')
        self.verbosity = int(verbosity)  # ensure we compare int to int
        if max_ingest is not None:
            self.max_ingest = int(max_ingest)

        # check folder path
        if folder_path is None:
            raise CommandError('Eudora folder base path is required')
        if not os.path.isdir(folder_path):
            raise CommandError('Eudora folder path "%s" is not a directory' % folder_path)
        self.noact = noact

        # check for any specified fedora credentials
        fedora_opts = {}
        if 'username' in options:
            fedora_opts['username'] = options['username']
        if 'password' in options:
            fedora_opts['password'] = options['password']
        self.repo = Repository(**fedora_opts)
        batch = self.repo.get_object(batch_id, type=ProcessingBatch)
        if not batch.exists:
            raise CommandError('Processing batch %s not found' % batch_id)
        print 'Looking for email messages in processing batch "%s"' \
              % batch.label

        try:
            pidman = DjangoPidmanRestClient()
        except:
            raise CommandError('Error initializing PID manager client; ' +
                               'please check settings.')

        self.stats = defaultdict(int)
        # purge old metadata email 'arrangement' objects that belong to this batch
        if not skip_purge:
            self.remove_arrangement_emails(batch)
        # ingest new objects for email mailboxes & messages
        if not purge_only:
            self.ingest_email(folder_path)
예제 #38
0
def simple_edit(request, pid=None):
    ''' Edit an existing Fedora
    :class:`~keep.collection.models.SimpleCollection`.  If a pid is
    specified, attempts to retrieve an existing object.
    '''
    repo = Repository(request=request)

    try:
        obj = repo.get_object(pid=pid, type=SimpleCollection)

        if request.method == 'POST':
            form = SimpleCollectionEditForm(request.POST)
            if form.is_valid():
                status = form.cleaned_data['status']


                if status == obj.mods.content.restrictions_on_access.text:
                    # don't queue job if there is no change
                    messages.info(request, 'Status is unchanged')

                else:
                    # queue celery task to update items in this batch
                    queue_batch_status_update(obj, status)
                    messages.info(
                        request,
                        'Batch status update has been queued; ' +
                        'please check later via <a href="%s">recent tasks</a> page' %
                        reverse('tasks:recent')
                    )

        else:
            #Just Display the form
            form = SimpleCollectionEditForm(initial={'status': obj.mods.content.restrictions_on_access.text})

    except RequestFailed, e:
        # if there was a 404 accessing objects, raise http404
        # NOTE: this probably doesn't distinguish between object exists with
        # no MODS and object does not exist at all
        if e.code == 404:
            raise Http404
        # otherwise, re-raise and handle as a common fedora connection error
        else:
            raise
예제 #39
0
파일: models.py 프로젝트: jrhoads/TheKeep
    def by_arrangement_id(id, repo=None):
        '''
        Static method to find an :class:`ArrangementObject` by its
        local or arrangement id.  Looks for the item in Solr and
        returns an :class:`ArrangementObject` instance initialized
        from the repository if a single match is found for the
        requested id.

        Raises :class:`django.core.exceptions.MultipleObjectsReturned`
        if more than one match is found; raises
        :class:`django.core.exceptions.ObjectDoesNotExist` if no
        matches are found in the Solr index.

        :param id: arrangement id or local id

        :param repo: optional :class:`eulfedora.server.Repository`
            to use an existing connection with specific credentials

        :returns: :class:`ArrangementObject`


        '''
        solr = solr_interface()
        q = solr.query(arrangement_id=id,
                   content_model=ArrangementObject.ARRANGEMENT_CONTENT_MODEL) \
                   .field_limit('pid')

        # check that we found one and only one
        found = len(q)
        # borrowing custom django exceptions for not found / too many
        # matches
        if found > 1:
            raise MultipleObjectsReturned('Found %d records with arrangement id %s' % \
                                          (found, id))
        if not found:
            raise ObjectDoesNotExist('No record found with arrangement id %s' %
                                     id)

        if repo is None:
            repo = Repository()

        return repo.get_object(q[0]['pid'], type=ArrangementObject)
예제 #40
0
    def by_arrangement_id(id, repo=None):
        '''
        Static method to find an :class:`ArrangementObject` by its
        local or arrangement id.  Looks for the item in Solr and
        returns an :class:`ArrangementObject` instance initialized
        from the repository if a single match is found for the
        requested id.

        Raises :class:`django.core.exceptions.MultipleObjectsReturned`
        if more than one match is found; raises
        :class:`django.core.exceptions.ObjectDoesNotExist` if no
        matches are found in the Solr index.

        :param id: arrangement id or local id

        :param repo: optional :class:`eulfedora.server.Repository`
            to use an existing connection with specific credentials

        :returns: :class:`ArrangementObject`


        '''
        solr = solr_interface()
        q = solr.query(arrangement_id=id,
                   content_model=ArrangementObject.ARRANGEMENT_CONTENT_MODEL) \
                   .field_limit('pid')

        # check that we found one and only one
        found = len(q)
        # borrowing custom django exceptions for not found / too many
        # matches
        if found > 1:
            raise MultipleObjectsReturned('Found %d records with arrangement id %s' % \
                                          (found, id))
        if not found:
            raise ObjectDoesNotExist('No record found with arrangement id %s' % id)

        if repo is None:
            repo = Repository()

        return repo.get_object(q[0]['pid'], type=ArrangementObject)
예제 #41
0
    def handle(self, *args, **options):
        self.options = options
        self.repaired_count = 0
        self.unrepaired_count = 0

        repo = Repository()
        self.pidman = DjangoPidmanRestClient()

        # populate list of objects to be processed
        objects = []
        for pid in args:
            try:
                obj = repo.get_object(pid=pid, type=CollectionObject)
                if obj.has_requisite_content_models:
                    objects.append(obj)
                else:
                    obj = repo.get_object(pid=pid, type=AudioObject)
                    if obj.has_requisite_content_models:
                        objects.append(obj)
            except Exception:
                self.log(
                    message="Could not find Collection or Audio object for: %s"
                    % pid)

        # get list of all collections from the repository
        # limited to the COLLECTION_CONTENT_MODEL as well as returns a Keep specific collection object
        if not args:
            objects = repo.get_objects_with_cmodel(
                CollectionObject.COLLECTION_CONTENT_MODEL,
                type=CollectionObject)

        if not objects:
            self.log(message="No Collections were found.")

        for obj in objects:
            self.repair_ark(obj)

        self.log(message="\n\n%s ARKs repaired\n%s ARKs were not repaired" %
                 (self.repaired_count, self.unrepaired_count),
                 no_label=True)
예제 #42
0
파일: views.py 프로젝트: jrhoads/TheKeep
def view(request, pid):
    '''View a single :class:`~keep.audio.models.AudioObject`.
    User must either have general view audio permissions, or if they have
    view researcher audio, the object must be researcher accessible
    (based on rights codes).
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=AudioObject)
    # user either needs view audio permissions OR
    # if they can view researcher audio and object must be researcher-accessible
    if not request.user.has_perm('audio.view_audio') and \
       not (request.user.has_perm('audio.view_researcher_audio') and \
       bool(obj.researcher_access)):
        return prompt_login_or_403(request)

    try:
        if not obj.has_requisite_content_models:
            raise Http404
    except:
        raise Http404

    return TemplateResponse(request, 'audio/view.html', {'resource': obj})
예제 #43
0
def view(request, pid):
    '''View a single :class:`~keep.audio.models.AudioObject`.
    User must either have general view audio permissions, or if they have
    view researcher audio, the object must be researcher accessible
    (based on rights codes).
    '''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=AudioObject)
    # user either needs view audio permissions OR
    # if they can view researcher audio and object must be researcher-accessible
    if not request.user.has_perm('audio.view_audio') and \
       not (request.user.has_perm('audio.view_researcher_audio') and \
       bool(obj.researcher_access)):
        return prompt_login_or_403(request)

    try:
        if not obj.has_requisite_content_models:
            raise Http404
    except:
        raise Http404

    return TemplateResponse(request, 'audio/view.html', {'resource': obj})
예제 #44
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # create test collection
        coll = self.repo.get_object(type=CollectionObject)
        coll.pid = '%s:parent-1' % settings.FEDORA_PIDSPACE
        coll.mods.content.source_id = '12345'
        coll.save()
        self.pids.append(coll.pid)

        #create test arrangement object
        self.arr = self.repo.get_object(type=ArrangementObject)
        self.arr.pid = 'foo:1'
        self.arr.collection = coll
예제 #45
0
    def setUp(self):
        self.repo = Repository()
        self.pids = []

        #Create a simple Collection
        self.sc = self.repo.get_object(type=SimpleCollection)
        self.sc.label = "SimpleCollection For Test"
        self.sc.save()
        self.pids.append(self.sc.pid)

        #Create a Master Collection
        self.mc = self.repo.get_object(type=CollectionObject)
        self.mc.label = "MasterCollection For Test"
        self.mc.save()
        self.pids.append(self.mc.pid)

        #Create a a DigitalObject
        self.digObj = self.repo.get_object(type=RushdieArrangementFile)
        self.digObj.label = "Object For Test"
        self.digObj.save()
        self.pids.append(self.digObj.pid)
        self.digObj.api.addDatastream(self.digObj.pid, "MARBL-MACTECH",
                                           "MARBL-MACTECH",  mimeType="application/xml", content= self.MM_FIXTURE)
        self.digObj.api.addDatastream(self.digObj.pid, "MARBL-ANALYSIS",
                                           "MARBL-ANALYSIS",  mimeType="application/xml", content= self.MA_FIXTURE)
        #Remove Arrangement model so it can be added later
        relation = (self.digObj.uriref, modelns.hasModel, "info:fedora/emory-control:Arrangement-1.0")
        self.digObj.rels_ext.content.remove(relation)
        self.digObj.save()


        #Setup Command
        self.cmd = migrate_rushdie.Command()
        self.cmd.verbosity = 1
        self.cmd.v_normal = 1
        self.cmd.v_none = 0
        self.cmd.simple_collection = self.sc
        self.cmd.stdout = sys.stdout
        self.cmd.CONTENT_MODELS = CONTENT_MODELS
        self.cmd.repo = self.repo
예제 #46
0
def manage_supplements(request, pid):
    '''Manage supplemental file datastreams associated with a
    :class:`~keep.file.models.DiskImage`.'''
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=DiskImage)
    if not obj.exists or not obj.has_requisite_content_models:
        raise Http404

    # generate initial data from any existing supplemental datastreams
    initial_data = []
    for s in obj.supplemental_content:
        initial_data.append({'dsid': s.id, 'label': s.label,
            'file': DatastreamFile(obj.pid, s.id, s.label)})

    # on get, just display the form
    if request.method == 'GET':
        formset = SupplementalFileFormSet(initial=initial_data)

    # on post, process the form and any updates/additions
    if request.method == 'POST':
        formset = SupplementalFileFormSet(request.POST, request.FILES,
            initial=initial_data)

        if formset.is_valid():
            m = magic.Magic(mime=True)

            # NOTE: because we currently don't support re-ordering
            # or deletion, simply counting to keep track of datastream ids
            s_id = 0
            modified = 0
            added = 0
            for file_info in formset.cleaned_data:
                # skip empty formset
                if not file_info:
                    continue

                if file_info.get('dsid', None):
                    ds = obj.getDatastreamObject(file_info['dsid'],
                        dsobj_type=FileDatastreamObject)
                    # ds = getattr(obj, file_info['dsid'])
                else:
                    added += 1
                    ds = obj.getDatastreamObject('supplement%d' % s_id,
                        dsobj_type=FileDatastreamObject)

                # only set if changed so datastream isModified is accurate
                if file_info['label'] != ds.label:
                    ds.label = file_info['label']

                # if this is an uploaded file, replace content and calculate mimetype, checksum
                if isinstance(file_info['file'], UploadedFile):

                    filename = file_info['file'].temporary_file_path()
                    mimetype = m.from_file(filename)
                    mimetype, separator, options = mimetype.partition(';')
                    ds.mimetype = mimetype
                    ds.checksum = md5sum(filename)
                    ds.content = file_info['file']

                if ds.exists and ds.isModified():
                    modified += 1

                s_id += 1

            try:
                obj.save('updating supplemental files')

                # summarize number of changes, if any
                if added or modified:
                    msg_add = 'added %d' % added if added else ''
                    msg_update = 'updated %d' % modified if modified else ''
                    msg = 'Successfully %s%s%s supplemental file%s' %  \
                        (msg_add, ' and ' if added and modified else '', msg_update,
                        's' if (added + modified) != 1 else '')
                    messages.success(request, msg)
                else:
                    # possible for the form to be valid but not make any changes
                    messages.info(request, 'No changes made to supplemental content')

                return HttpResponseSeeOtherRedirect(reverse('file:edit', args=[pid]))

            except Exception as e:
                logger.error('Error on supplemental file update: %s' % e)
                logger.debug("Error details:\n" + traceback.format_exc())

                messages.error(request, unicode(e))
                # for now, just redisplay the form with error message

    return TemplateResponse(request, 'file/supplemental_content.html',
        {'obj': obj, 'formset': formset})
예제 #47
0
def edit(request, pid):
    '''Edit the metadata for a single :class:`~keep.file.models.DiskImage`.'''
    # FIXME: should be generic file (?) or possibly one of several supported files
    repo = Repository(request=request)
    obj = repo.get_object(pid, type=DiskImage)
    try:
        # if this is not actually a disk image, then 404 (object is not available at this url)
        if not obj.has_requisite_content_models:
            raise Http404

        if request.method == 'POST':

            # if data has been submitted, initialize form with request data and object mods
            form = DiskImageEditForm(request.POST, instance=obj)
            if form.is_valid():     # includes schema validation
                # update foxml object with data from the form
                form.update_instance()
                if 'comment' in form.cleaned_data \
                         and form.cleaned_data['comment']:
                     comment = form.cleaned_data['comment']
                else:
                    comment = "update metadata"

                obj.save(comment)
                messages.success(request, 'Successfully updated <a href="%s">%s</a>' % \
                        (reverse('file:edit', args=[pid]), pid))
                # save & continue functionality - same as collection edit
                if '_save_continue' not in request.POST:
                    return HttpResponseSeeOtherRedirect(reverse('repo-admin:dashboard'))
                # otherwise - fall through to display edit form again

            # form was posted but not valid
            else:
                # if we attempted to save and failed, add a message since the error
                # may not be obvious or visible in the first screenful of the form
                messages.error(request,
                    '''Your changes were not saved due to a validation error.
                    Please correct any required or invalid fields indicated below and save again.''')

        else:
            # GET - display the form for editing, pre-populated with content from the object
            form = DiskImageEditForm(instance=obj)

        class AdminOpts(object):
            app_label = 'file'
            model_name = 'application'

        # options for generating admin link to edit/add file application db info
        admin_fileapp = AdminOpts()

        return TemplateResponse(request, 'file/edit.html', {'obj': obj, 'form': form,
            'admin_fileapp': admin_fileapp})

    except PermissionDenied:
        # Fedora may return a PermissionDenied error when accessing a datastream
        # where the datastream does not exist, object does not exist, or user
        # does not have permission to access the datastream

        # check that the object exists - if not, 404
        if not obj.exists:
            raise Http404
        # for now, assuming that if object exists and has correct content models,
        # it will have all the datastreams required for this view

        return HttpResponseForbidden('Permission Denied to access %s' % pid,
                                     content_type='text/plain')

    except RequestFailed as rf:
        # if fedora actually returned a 404, propagate it
        if rf.code == 404:
            raise Http404

        msg = 'There was an error contacting the digital repository. ' + \
              'This prevented us from accessing audio data. If this ' + \
              'problem persists, please alert the repository ' + \
              'administrator.'
        return HttpResponse(msg, content_type='text/plain', status=500)
예제 #48
0
class TestMigrateRushdie(TestCase):
    MM_FIXTURE ='''<macfs:document xmlns:macfs="info:fedora/emory-control:Rushdie-MacFsData-1.0">
  <macfs:md5>ffcf48e5df673fc7de985e1b859eeeec</macfs:md5>
  <macfs:file>
    <macfs:computer>Performa 5400</macfs:computer>
    <macfs:path>/Hard Disk/MIDNIGHT&apos;S CHILDREN/MISC. MATERIAL/x - the roles</macfs:path>
    <macfs:rawpath>L0hhcmQgRGlzay9NSUROSUdIVCdTIENISUxEUkVOL01JU0MuIE1BVEVSSUFML3ggLSB0aGUgcm9sZXM=</macfs:rawpath>
    <macfs:attributes>avbstclInmedz</macfs:attributes>
    <macfs:created>1997-01-19T19:29:32</macfs:created>
    <macfs:modified>1997-01-19T19:29:32</macfs:modified>
    <macfs:type>TEXT</macfs:type>
    <macfs:creator>ttxt</macfs:creator>
  </macfs:file>
</macfs:document>'''

    MA_FIXTURE ='''<marbl:analysis xmlns:marbl="info:fedora/emory-control:Rushdie-MarblAnalysis-1.0">
  <marbl:series>Writings by Rushdie</marbl:series>
  <marbl:subseries>Fiction</marbl:subseries>
  <marbl:verdict>As is</marbl:verdict>
</marbl:analysis>'''

    SERIES_FIXTURE = {'Writings by Rushdie':
              { 'series_info':
                   {'base_ark': 'http://testpid.library.emory.edu/ark:/25593/80mvk',
                        'id': 'rushdie1000_series2',
                        'short_id': 'series2',
                        'uri': 'https://findingaids.library.emory.edu/documents/rushdie1000/series2'},
              'subseries_info': {   'Fiction': {   'base_ark': 'http://testpid.library.emory.edu/ark:/25593/80mvk',
                                            'id': 'rushdie1000_subseries2.1',
                                            'short_id': 'subseries2.1',
                                            'uri': 'https://findingaids.library.emory.edu/documents/rushdie1000/series2/subseries2.1'}}}}

    def setUp(self):
        self.repo = Repository()
        self.pids = []

        #Create a simple Collection
        self.sc = self.repo.get_object(type=SimpleCollection)
        self.sc.label = "SimpleCollection For Test"
        self.sc.save()
        self.pids.append(self.sc.pid)

        #Create a Master Collection
        self.mc = self.repo.get_object(type=CollectionObject)
        self.mc.label = "MasterCollection For Test"
        self.mc.save()
        self.pids.append(self.mc.pid)

        #Create a a DigitalObject
        self.digObj = self.repo.get_object(type=RushdieArrangementFile)
        self.digObj.label = "Object For Test"
        self.digObj.save()
        self.pids.append(self.digObj.pid)
        self.digObj.api.addDatastream(self.digObj.pid, "MARBL-MACTECH",
                                           "MARBL-MACTECH",  mimeType="application/xml", content= self.MM_FIXTURE)
        self.digObj.api.addDatastream(self.digObj.pid, "MARBL-ANALYSIS",
                                           "MARBL-ANALYSIS",  mimeType="application/xml", content= self.MA_FIXTURE)
        #Remove Arrangement model so it can be added later
        relation = (self.digObj.uriref, modelns.hasModel, "info:fedora/emory-control:Arrangement-1.0")
        self.digObj.rels_ext.content.remove(relation)
        self.digObj.save()


        #Setup Command
        self.cmd = migrate_rushdie.Command()
        self.cmd.verbosity = 1
        self.cmd.v_normal = 1
        self.cmd.v_none = 0
        self.cmd.simple_collection = self.sc
        self.cmd.stdout = sys.stdout
        self.cmd.CONTENT_MODELS = CONTENT_MODELS
        self.cmd.repo = self.repo

    def tearDown(self):
        for pid in self.pids:
            self.repo.purge_object(pid)


    def test__add_to_simple_collection(self):
        self.cmd._add_to_simple_collection(self.digObj)
        self.assertTrue((self.sc.uriref, relsextns.hasMember,
                     self.digObj.uriref) in self.sc.rels_ext.content, "%s shold be a member of the Simplecollection" % self.digObj.pid )


    def test__get_unique_objects(self):
        #duplicate pids are processed only once
        objs = self.cmd._get_unique_objects([self.digObj.pid, self.digObj.pid])
        self.assertEqual(len(objs), 1, "No dup pids should be processed")

    def test__convert_ds(self):
        obj = self.cmd._convert_ds(self.digObj, self.mc, self.SERIES_FIXTURE, False)
        #Check all fields are moved over correctly

        #filetech
        self.assertEqual(obj.filetech.content.file[0].md5, "ffcf48e5df673fc7de985e1b859eeeec")
        self.assertEqual(obj.filetech.content.file[0].computer, "Performa 5400")
        self.assertEqual(obj.filetech.content.file[0].path, "/Hard Disk/MIDNIGHT'S CHILDREN/MISC. MATERIAL/x - the roles")
        self.assertEqual(obj.filetech.content.file[0].rawpath, "L0hhcmQgRGlzay9NSUROSUdIVCdTIENISUxEUkVOL01JU0MuIE1BVEVSSUFML3ggLSB0aGUgcm9sZXM=")
        self.assertEqual(obj.filetech.content.file[0].attributes, "avbstclInmedz")
        self.assertEqual(obj.filetech.content.file[0].created, "1997-01-19T19:29:32")
        self.assertEqual(obj.filetech.content.file[0].modified, "1997-01-19T19:29:32")
        self.assertEqual(obj.filetech.content.file[0].type, "TEXT")
        self.assertEqual(obj.filetech.content.file[0].creator, "ttxt")
        #MODS
        self.assertEqual(obj.mods.content.series.title, "Fiction")
        self.assertEqual(obj.mods.content.series.uri, self.SERIES_FIXTURE["Writings by Rushdie"]["subseries_info"]["Fiction"]["uri"])
        self.assertEqual(obj.mods.content.series.base_ark, self.SERIES_FIXTURE["Writings by Rushdie"]["subseries_info"]["Fiction"]["base_ark"])
        self.assertEqual(obj.mods.content.series.full_id, self.SERIES_FIXTURE["Writings by Rushdie"]["subseries_info"]["Fiction"]["id"])
        self.assertEqual(obj.mods.content.series.short_id, self.SERIES_FIXTURE["Writings by Rushdie"]["subseries_info"]["Fiction"]["short_id"])
        self.assertEqual(obj.mods.content.series.series.title, "Writings by Rushdie")
        self.assertEqual(obj.mods.content.series.series.uri, self.SERIES_FIXTURE["Writings by Rushdie"]["series_info"]["uri"])
        self.assertEqual(obj.mods.content.series.series.base_ark, self.SERIES_FIXTURE["Writings by Rushdie"]["series_info"]["base_ark"])
        self.assertEqual(obj.mods.content.series.series.full_id, self.SERIES_FIXTURE["Writings by Rushdie"]["series_info"]["id"])
        self.assertEqual(obj.mods.content.series.series.short_id, self.SERIES_FIXTURE["Writings by Rushdie"]["series_info"]["short_id"])
        #Rights
        self.assertEqual(obj.rights.content.access_status.code, "2")
        #RELS-EXT
        self.assertTrue((obj.uriref, relsextns.isMemberOf, self.mc.uriref) in obj.rels_ext.content, "Object should have isMember relation to master collection")
        self.assertTrue((obj.uriref, modelns.hasModel, URIRef("info:fedora/emory-control:ArrangementAccessAllowed-1.0")) in obj.rels_ext.content, "Object should have Allowed Content Model")
        #Label and DS
        self.assertEqual(obj.label, "x - the roles", "Label should be set to last part of path")
        self.assertEqual(obj.owner, "thekeep-project", "owner should be set to 'thekeep-project'")
        self.assertEqual(obj.dc.content.title, "x - the roles", "DC title should be set to last part of path")
        #DataStreams
        #have to reload obj from repository to get DS update
        obj = self.repo.get_object(pid=obj.pid, type=ArrangementObject)
        self.assertFalse("MARBL-MACTECH" in obj.ds_list, "MARBL-MACTECH should have been removed")
        self.assertFalse("MARBL-ANALYSIS" in obj.ds_list, "MARBL-ANALYSIS should have been removed")

    def test_missing_series_info(self):
        #Remove subseries info from lookup
        series = self.SERIES_FIXTURE.copy()
        del series["Writings by Rushdie"]["subseries_info"]
        obj = self.cmd._convert_ds(self.digObj, self.mc, self.SERIES_FIXTURE, False)

        self.assertEqual(obj.mods.content.series.title, "Fiction")
        self.assertEqual(obj.mods.content.series.series.title, "Writings by Rushdie")
예제 #49
0
class EmailMessageTest(KeepTestCase):

    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # test EmailMessage
        self.email = self.repo.get_object(type=EmailMessage)
        self.email.cerp.content.from_list = ['*****@*****.**']
        self.email.cerp.content.to_list = ['*****@*****.**']
        self.email.cerp.content.subject_list = ['Interesting Subject']

    def tearDown(self):
        for pid in self.pids:
            self.repo.purge_object(pid)

    def test_headers(self):
        h1 = cerp.Header()
        h1.name = "HEADER 1"
        h1.value = "value for header 1"
        h2 = cerp.Header()
        h2.name = "HEADER 2"
        h2.value = "value for header 2"
        self.email.cerp.content.headers.append(h1)
        self.email.cerp.content.headers.append(h2)
        self.assertEqual(self.email.headers['HEADER 1'], 'value for header 1')
        self.assertEqual(self.email.headers['HEADER 2'], 'value for header 2')


    def test_email_label(self):
        # no object label and one person in to field
        label = self.email.email_label()
        self.assertEqual('Email from [email protected] to [email protected] Interesting Subject',
                         label,
                         'Should construct label when it does not exist')

        # more then one person in to list
        self.email.cerp.content.to_list.append('*****@*****.**')
        label = self.email.email_label()
        self.assertEqual('Email from [email protected] to [email protected] et al. Interesting Subject',
                         label,
                         'only show first to email address when there are more than one')

        # no subject
        self.email.cerp.content.subject_list = []
        self.assertEqual('Email from [email protected] to [email protected] et al.',
                         self.email.email_label(),
                         'Display message without subject when no subject is present')

        # has a date
        date_header = cerp.Header()
        date_header.name = 'Date'
        date_header.value = 'Friday 13 200 13:00'
        self.email.cerp.content.headers.append(date_header)
        label = self.email.email_label()
        self.assertEqual('Email from [email protected] to [email protected] et al. on Friday 13 200 13:00',
                         label,
                         'only show first to email address when there are more than one')

        # object label already exists
        self.email.label = "label we want to keep"
        label = self.email.email_label()
        self.assertEqual(self.email.label, label, 'label should be preserved when it exists')

    def test_index_data(self):
        # NOTE: logic for creating the label is in the label test

        # test to make sure label exists in index data
        data = self.email.index_data()
        self.assertIn('label', data.keys())
        # mime_data does not exist, so no c
        self.assert_('content_md5' not in data,
                     'content_md5 should not be set when mime data does not exist')

        # patch mime data to test exists /cchecksum
        with patch.object(self.email, 'mime_data', Mock()) as mock_mime:
            mock_mime.exists = True
            mock_mime.checksum = 'test checksum value'

            data = self.email.index_data()
            self.assertEqual(self.email.mime_data.checksum, data['content_md5'])

    @patch('keep.arrangement.models.solr_interface', spec=sunburnt.SolrInterface)
    def test_by_checksum(self, mocksolr):
        # no match
        self.assertRaises(ObjectDoesNotExist, EmailMessage.by_checksum,
                          42)
        solr = mocksolr.return_value
        solr.query.assert_called_with(content_md5=42,
                                      content_model=ArrangementObject.ARRANGEMENT_CONTENT_MODEL)
        solr.query.return_value.field_limit.assert_called_with('pid')

        # too many matches
        solr.query.return_value.field_limit.return_value = [{'pid': 'pid:1'},
                                                            {'pid': 'pid:2'}]
        self.assertRaises(MultipleObjectsReturned, EmailMessage.by_checksum,
                          42)

        # one match
        solr.query.return_value.field_limit.return_value = [{'pid': 'pid:1'}]
        em = EmailMessage.by_checksum(42)
        self.assert_(isinstance(em, EmailMessage))

        # custom repo object
        mockrepo = Mock()
        em = EmailMessage.by_checksum(42, mockrepo)
        mockrepo.get_object.assert_called_with('pid:1', type=EmailMessage)

    @patch('keep.arrangement.models.solr_interface', spec=sunburnt.SolrInterface)
    def test_by_message_id(self, mocksolr):
        # no match
        self.assertRaises(ObjectDoesNotExist, EmailMessage.by_message_id,
                          '<*****@*****.**>')
        solr = mocksolr.return_value
        solr.query.assert_called_with(arrangement_id='<*****@*****.**>',
                                      content_model=ArrangementObject.ARRANGEMENT_CONTENT_MODEL)
        solr.query.return_value.field_limit.assert_called_with('pid')
예제 #50
0
def upload(request):
    '''Upload file(s) and create new fedora :class:`~keep.audio.models.AudioObject` (s).
    Only accepts audio/x-wav currently.

    There are two distinct ways to upload file. The first case is
    kicked off when "fileManualUpload" exists in the posted form. If
    it does, then this was not a HTML5 browser, and the file upload
    occurs as is usual for a single file upload.

    In the other approach, the file was uploaded via a HTML5 ajax
    upload already. In this case, we are reading in various hidden
    generated form fields that indicate what was uploaded from the
    javascript code.
    '''
    repo = Repository(request=request)

    ctx_dict = {
        # list of allowed file types, in a format suited for passing to javascript
        'js_allowed_types': mark_safe(json.dumps(allowed_upload_types(request.user)))
    }

    if request.method == 'POST':
        content_type = request.META.get('CONTENT_TYPE', 'application/octet-stream')
        media_type, sep, options = content_type.partition(';')
        # content type is technically case-insensitive; lower-case before comparing
        media_type = media_type.strip().lower()

        # if form has been posted, process & ingest files
        if media_type == 'multipart/form-data':

            # check for a single file upload
            form = UploadForm(request.POST, request.FILES)

            # If form is not valid (i.e., no collection specified, no
            # or mismatched files uploaded), bail out and redisplay
            # form with any error messages.
            if not form.is_valid():
                ctx_dict['form'] = form
                return TemplateResponse(request, 'file/upload.html', ctx_dict)

            # Form is valid. Get collection & check for optional comment
            collection = repo.get_object(pid=form.cleaned_data['collection'],
                                         type=CollectionObject)
            # get user comment if any; default to a generic ingest comment
            comment = form.cleaned_data['comment'] or 'initial repository ingest'
            # get dictionary of file path -> filename, based on form data
            files_to_ingest = form.files_to_ingest()


            # process all files submitted for ingest (single or batch mode)
            if files_to_ingest:
                results = ingest_files(files_to_ingest, collection, comment, request)

                # add per-file ingest result status to template context
                ctx_dict['ingest_results'] = results
                # after processing files, fall through to display upload template

        else:
            # POST but not form data - handle ajax file upload
            return ajax_upload(request)

    # on GET or non-ajax POST, display the upload form
    ctx_dict['form'] = UploadForm()
    # convert list of allowed types for passing to javascript

    return TemplateResponse(request, 'file/upload.html', ctx_dict)
예제 #51
0
class ArrangementObjectTest(KeepTestCase):

    def setUp(self):
        self.repo = Repository()
        self.pids = []

        # create test collection
        coll = self.repo.get_object(type=CollectionObject)
        coll.pid = '%s:parent-1' % settings.FEDORA_PIDSPACE
        coll.mods.content.source_id = '12345'
        coll.save()
        self.pids.append(coll.pid)

        #create test arrangement object
        self.arr = self.repo.get_object(type=ArrangementObject)
        self.arr.pid = 'foo:1'
        self.arr.collection = coll

    def tearDown(self):
        for pid in self.pids:
            self.repo.purge_object(pid)

    @patch('keep.arrangement.models.solr_interface', spec=sunburnt.SolrInterface)
    def test_by_arrangement_id(self, mocksolr):
        # no match
        self.assertRaises(ObjectDoesNotExist, ArrangementObject.by_arrangement_id,
                          42)
        solr = mocksolr.return_value
        solr.query.assert_called_with(arrangement_id=42,
                                      content_model=ArrangementObject.ARRANGEMENT_CONTENT_MODEL)
        solr.query.return_value.field_limit.assert_called_with('pid')

        # too many matches
        solr.query.return_value.field_limit.return_value = [{'pid': 'pid:1'},
                                                            {'pid': 'pid:2'}]
        self.assertRaises(MultipleObjectsReturned, ArrangementObject.by_arrangement_id,
                          42)

        # one match
        solr.query.return_value.field_limit.return_value = [{'pid': 'pid:1'}]
        ao = ArrangementObject.by_arrangement_id(42)
        self.assert_(isinstance(ao, ArrangementObject))

        # custom repo object
        mockrepo = Mock()
        ao = ArrangementObject.by_arrangement_id(42, mockrepo)
        mockrepo.get_object.assert_called_with('pid:1', type=ArrangementObject)

    def test_arrangement_status(self):
        obj = ArrangementObject(Mock())
        obj.arrangement_status = 'processed'
        self.assertEqual('A', obj.state)
        self.assertEqual('processed', obj.arrangement_status)

        obj.arrangement_status = 'accessioned'
        self.assertEqual('I', obj.state)
        self.assertEqual('accessioned', obj.arrangement_status)

        value_error = None
        try:
            obj.arrangement_status = 'bogus'
        except ValueError:
            value_error = True

        self.assertTrue(value_error,
                        'attempting to assign an unknown status should raise a ValueError')

    def test_update_access_cmodel(self):
        obj = ArrangementObject(Mock())
        # no status set - should be set to restricted
        obj._update_access_cmodel()

        self.assert_((obj.uriref, modelns.hasModel, URIRef(ACCESS_RESTRICTED_CMODEL))
                     in obj.rels_ext.content)
        self.assert_((obj.uriref, modelns.hasModel, URIRef(ACCESS_ALLOWED_CMODEL))
                     not in obj.rels_ext.content)

        # set to status code 2 = access allowed
        obj.rights.content.create_access_status()
        obj.rights.content.access_status.code = '2'

        obj._update_access_cmodel()

        self.assert_((obj.uriref, modelns.hasModel, URIRef(ACCESS_RESTRICTED_CMODEL))
                     not in obj.rels_ext.content)
        self.assert_((obj.uriref, modelns.hasModel, URIRef(ACCESS_ALLOWED_CMODEL))
                     in obj.rels_ext.content)

    def test_index_data(self):
        idx_data = self.arr.index_data()
        self.assertEqual('born-digital', idx_data['object_type'])
        self.assertEqual(self.arr.pid, idx_data['pid'])
        self.assertIn(self.arr.owner, idx_data['owner'])
        self.assertEquals(self.arr.collection.pid, idx_data['collection_id'])
        self.assertEquals(self.arr.collection.mods.content.source_id, idx_data['collection_source_id'])

    # Test the update_ark_label method in the keep.common.fedora
    # Note that this test is a simplified version of keep.common.fedora:ArkPidDigitalObject.test_update_ark_label
    # The udpate_ark_label here is an overriden method that is more specifc, and is used on Arrangement objects
    @patch('keep.arrangement.models.pidman')  # mock the pidman client (the API service)
    def test_update_ark_label(self, mockpidman):

        # Create a ArrangementObject
        arrangement_object = ArrangementObject(Mock())

        # Set a pid on the object so that it could internally generate a noid etc.
        arrangement_object.pid = "test:1234"

        # Simulate when the object doesn't exist (or hasn't been saved)
        # By default it appears as if it doesn't exist
        arrangement_object.update_ark_label()

        # What we should expect is that the update_ark_label is not called on pidman
        # Also there shouldn't be any errors
        # Use the mock assertFalse to check if a method is called or not
        self.assertFalse(mockpidman.get_ark.called)

        # Mock when the object exists (returns True)
        # Note: Need to set the Mock on the class and not the object because
        # this (exists) is a property method
        with patch.object(ArrangementObject, 'exists', new=Mock(return_value=True)):
            arrangement_object.update_ark_label()
            self.assertFalse(mockpidman.get_ark.called)

        # Set the label before the object exists so we don't trigger API calls
        arrangement_object.dc.content.title = "testpid"
        with patch.object(ArrangementObject, 'exists', new=Mock(return_value=True)):
            mockpidman.get_ark.return_value = {"name": arrangement_object.dc.content.title}
            arrangement_object.update_ark_label()
            mockpidman.get_ark.assert_called_with(arrangement_object.noid) # assert that it is called with a noid too
            self.assertFalse(mockpidman.update_ark.called)

            # When the label is different from that in Pidman
            mockpidman.get_ark.return_value = {"name": "another pid"}
            arrangement_object.update_ark_label()
            mockpidman.get_ark.assert_called_with(arrangement_object.noid) # assert that it is called with a noid too
            mockpidman.update_ark.assert_called_with(noid=arrangement_object.noid, name=arrangement_object.dc.content.title)

    def test_set_premis_object(self):
        mockapi = Mock()
        arrangement_object = ArrangementObject(mockapi)
        arrangement_object.pid = "test:1234"
        arrangement_object.mods.content.ark = 'ark:/1234/987'

        # return empty iterator for original data to checksum
        mockapi.getDatastreamDissemination.return_value = []
        with patch.object(arrangement_object, 'getDatastreamObject') as mockgetds:
            mockgetds.return_value.checksum = '123456789'
            mockgetds.return_value.mimetype = 'text/plain'
            arrangement_object.set_premis_object()

        self.assert_(arrangement_object.provenance.content.object)
        premis = arrangement_object.provenance.content
        # FIXME: placeholder tests for placeholder functionality,
        # should be updated to use ARK uri once that is implemented
        self.assertEqual('ark', premis.object.id_type)
        self.assertEqual(arrangement_object.mods.content.ark, premis.object.id)
        self.assertEqual('p:file', premis.object.type)
        self.assertEqual(0, premis.object.composition_level)
        self.assertEqual('MD5', premis.object.checksums[0].algorithm)
        self.assertEqual('123456789',
                         premis.object.checksums[0].digest)
        # sha1 for an empty file
        empty_sha1 = 'da39a3ee5e6b4b0d3255bfef95601890afd80709'
        self.assertEqual('SHA-1', premis.object.checksums[1].algorithm)
        self.assertEqual(empty_sha1,
                         premis.object.checksums[1].digest)
        # object format should be original mietype
        self.assertEqual('text/plain', premis.object.format.name)

        # generated premis should be valid
        self.assertTrue(premis.is_valid())

    def test_identifier_change_event(self):
        mockapi = Mock()
        mockapi.username = '******'
        arrangement_object = ArrangementObject(mockapi)
        arrangement_object.pid = 'test:1234'
        arrangement_object.mods.content.ark = 'ark:/1234/987'

        # set object premis so we can validate
        mockapi.getDatastreamDissemination.return_value = []
        with patch.object(arrangement_object, 'getDatastreamObject') as mockgetds:
            mockgetds.return_value.checksum = '123456789'
            mockgetds.return_value.mimetype = 'text/plain'
            arrangement_object.set_premis_object()

        arrangement_object.identifier_change_event('old-pid:1')
        premis = arrangement_object.provenance.content
        self.assertEqual(1, len(premis.events))
        event = premis.events[0]
        self.assertEqual('UUID', event.id_type)
        # id should be set, we don't care what it is exactly
        self.assert_(event.id)
        self.assertEqual('identifier assignment', event.type)
        self.assertEqual('program="keep"; version="%s"' % __version__,
                         event.detail)
        self.assertEqual('Pass', event.outcome)
        msg = 'Persistent identifier reassigned from %s to %s' % \
            ('old-pid:1', arrangement_object.pid)
        self.assertEqual(msg, event.outcome_detail)
        self.assertEqual('fedora user', event.agent_type)
        self.assertEqual('fedoraAdmin', event.agent_id)

        # generated premis should be valid
        self.assertTrue(premis.is_valid())
예제 #52
0
def batch_set_status(pid, status):
    repo = Repository()
    batch = repo.get_object(pid, type=SimpleCollection)
    # keep track of totals for success and failure
    success = 0
    error = 0

    # translate form status codes to fedora state code
    # TODO: shift this logic to arrangement object for re-use ?
    codes = {'Processed': 'A', 'Accessioned': 'I'}

    # target state for every object in the collection
    if status not in codes:
        err_msg = 'Status %s unknown' % status
        logger.error(err_msg)
        raise Exception(err_msg)
    else:
        state = codes[status]

    # finp all pids associated with this object
    pids = list(
        batch.rels_ext.content.objects(batch.uriref, relsextns.hasMember))

    for pid in pids:
        try:
            # pass in api from batch object to retain user credentials
            obj = ArrangementObject(batch.api, pid)
            obj.state = state
            obj.save('Marking as %s via SimpleCollection %s' %
                     (status, batch.pid))
            success += 1
        except Exception as e:
            logger.error('Failed to update %s : %s' % (pid, e))
            error += 1

    info = {
        'success': success,
        'error': error,
        'success_plural': '' if success == 1 else 's',
        'error_plural': '' if error == 1 else 's',
        'status': status
    }

    summary_msg = "Successfully updated %(success)s item%(success_plural)s; error updating %(error)s" % info

    # if not all objects were updated correctly, exit with error
    if error > 0:
        raise Exception(summary_msg)

    # FIXME: this is based on the current form logic, but could leave
    # some member items stranded in a different status than the parent object

    batch.mods.content.create_restrictions_on_access()
    batch.mods.content.restrictions_on_access.text = status  # Change collection status
    try:
        batch.save(
            'Marking as %(status)s; updated %(success)s member item%(success_plural)s'
            % info)

    except Exception as e:
        save_err = "Error updating SimpleCollection %s - %s" % (obj.pid, e)
        logger.error(save_err)
        raise Exception('%s; %s' % (save_err, summary_msg))

    # success
    return 'Successfully updated %(success)s item%(success_plural)s' % info
예제 #53
0
파일: tasks.py 프로젝트: jrhoads/TheKeep
def migrate_aff_diskimage(self, pid):
    creating_application = 'AccessData FTK Imager'
    application_version = 'v3.1.1 CLI'
    migration_event_detail = 'program="%s"; version="%s"' % \
        (creating_application, application_version)
    migration_event_outcome = 'AFF reformatted as E01 using command line ' + \
        'FTK program with settings: --e01 --compress 0 --frag 100T --quiet'

    # use the configured ingesting staging area as the base tmp dir
    # create
    # for all temporary files
    staging_dir = getattr(settings, 'LARGE_FILE_STAGING_DIR', None)
    # create a tempdir within the large file staging area
    tmpdir = tempfile.mkdtemp(suffix='-aff-migration', dir=staging_dir)
    logger.debug('Using tmpdir %s', tmpdir)

    # Retrieve the object to be migrated
    repo = Repository()
    original = repo.get_object(pid, type=DiskImage)

    # check object before migrating
    # - exists in fedora
    if not original.exists:
        # raise Exception
        raise Exception('%s not found in Fedora' % original.pid)
    # - is a disk image
    if not original.has_requisite_content_models:
        raise Exception('%s is not a DiskImage object' % original.pid)
    # - is an AFF disk image
    if original.provenance.content.object.format.name != 'AFF':
        raise Exception('%s DiskImage format is not AFF' % original.pid)
    # - has not already been migrated
    if original.migrated is not None:
        raise Exception('%s has already been migrated' % original.pid)

    # download the aff disk image to a tempfile
    aff_file = tempfile.NamedTemporaryFile(suffix='.aff',
                                           prefix='keep-%s_' % original.noid,
                                           dir=tmpdir,
                                           delete=False)
    logger.debug('Saving AFF as %s for conversion (datastream size: %s)' \
        % (aff_file.name, filesizeformat(original.content.size)))
    try:
        for chunk in original.content.get_chunked_content():
            aff_file.write(chunk)
    except Exception as err:
        raise Exception('Error downloading %s AFF for conversion' %
                        original.pid)

    # close the file handle in case of weird interactions with ftkimager
    aff_file.close()
    aff_size = os.path.getsize(aff_file.name)
    logger.debug('Downloaded %s' % filesizeformat(aff_size))

    # run ftkimager to generate the E01 version
    logger.debug('Running ftkimager to generate E01')
    e01_file = tempfile.NamedTemporaryFile(suffix='.E01',
                                           prefix='keep-%s_' % original.noid,
                                           dir=tmpdir,
                                           delete=False)
    # close the file handle in case of weird interactions with ftkimager
    e01_file.close()
    # file handle to capture console output from ftkimager
    ftk_output = tempfile.NamedTemporaryFile(suffix='.txt',
                                             prefix='keep-%s-ftkimager_' %
                                             original.noid,
                                             dir=tmpdir)
    logger.debug('E01 temp file is %s' % e01_file.name)
    logger.debug('ftkimager output temp file is %s' % ftk_output.name)
    # ftkimager adds .E01 to the specified filename, so pass in filename without
    e01_file_basename, ext = os.path.splitext(e01_file.name)

    convert_command = [
        'ftkimager', aff_file.name, e01_file_basename, '--e01', '--compress',
        '0', '--frag', '100T', '--quiet'
    ]
    # quiet simply suppresses progress output, which is not meaningful
    # in a captured text file
    logger.debug('conversion command is %s' % ' '.join(convert_command))
    return_val = subprocess.call(convert_command,
                                 stdout=ftk_output,
                                 stderr=subprocess.STDOUT)
    logger.debug('ftkimager return value is %s' % return_val)
    ftk_detail_output = '%s.txt' % e01_file.name

    e01_size = os.path.getsize(e01_file.name)
    if e01_size == 0:
        raise Exception('Generated E01 file is 0 size')

    logger.info('Generated E01 (%s) from %s AFF (%s)' % \
        (filesizeformat(e01_size), original.pid, filesizeformat(aff_size)))

    # use ftkimager to verify aff and e01 and compare checksums
    aff_checksums = ftkimager_verify(aff_file.name)
    if not aff_checksums:
        raise Exception('Error running ftkimager verify on AFF for %s' %
                        original.pid)
    e01_checksums = ftkimager_verify(e01_file.name)
    if not e01_checksums:
        raise Exception('Error running ftkimager verify on E01 for %s' %
                        original.pid)

    logger.debug('AFF verify checksums: %s' % \
        ', '.join('%s: %s' % (k, v) for k, v in aff_checksums.iteritems()))
    logger.debug('E01 verify checksums: %s' % \
        ', '.join('%s: %s' % (k, v) for k, v in e01_checksums.iteritems()))
    if aff_checksums != e01_checksums:
        raise Exception('AFF and E01 ftkimager verify checksums do not match')

    # create a new diskimage object from the file
    # - calculate file uri for content location
    e01_file_uri = fedora_file_uri(e01_file.name)
    logger.debug('E01 fedora file URI is %s', e01_file_uri)

    # change permissions on tmpdir + files to ensure fedora can access them
    os.chmod(tmpdir, 0775)
    os.chmod(e01_file.name, 0666)
    os.chmod(ftk_output.name, 0666)
    os.chmod(ftk_detail_output, 0666)

    migrated = DiskImage.init_from_file(e01_file.name,
                                        initial_label=original.label,
                                        content_location=e01_file_uri)

    # add ftkimager text output & details as supplemental files
    # - console output captured from subprocess call
    dsobj = migrated.getDatastreamObject('supplement0',
                                         dsobj_type=FileDatastreamObject)
    dsobj.label = 'ftkimager_output.txt'
    dsobj.mimetype = 'text/plain'
    dsobj.checksum = md5sum(ftk_output.name)
    logger.debug('Adding ftkimager console output as supplemental dastream %s label=%s mimetype=%s checksum=%s' % \
                (dsobj.id, dsobj.label, dsobj.mimetype, dsobj.checksum))
    dsobj.content = open(ftk_output.name).read()
    # - text file generated by ftkimager alongside the E01
    dsobj2 = migrated.getDatastreamObject('supplement1',
                                          dsobj_type=FileDatastreamObject)
    dsobj2.label = 'ftkimager_summary.txt'
    dsobj2.mimetype = 'text/plain'
    dsobj2.checksum = md5sum(ftk_detail_output)
    logger.debug('Adding ftkimager summary as supplemental dastream %s label=%s mimetype=%s checksum=%s' % \
                (dsobj2.id, dsobj2.label, dsobj2.mimetype, dsobj2.checksum))
    dsobj2.content = open(ftk_detail_output).read()

    # set metadata based on original disk image
    # - associate with original
    migrated.original = original
    # copy over descriptive & rights metadata
    # - collection membership
    migrated.collection = original.collection
    # - mods title, covering dates, abstract
    migrated.mods.content.title = original.mods.content.title
    migrated.mods.content.abstract = original.mods.content.abstract
    migrated.mods.content.coveringdate_start = original.mods.content.coveringdate_start
    migrated.mods.content.coveringdate_end = original.mods.content.coveringdate_end
    # - entire rights datastream
    migrated.rights.content = original.rights.content

    ### Update generated premis to describe migration.
    premis_ds = migrated.provenance.content
    premis_ds.object.composition_level = 0
    # these values are the same for all migrated AFFs
    premis_ds.object.create_creating_application()
    premis_ds.object.creating_application.name = creating_application
    premis_ds.object.creating_application.version = application_version
    premis_ds.object.creating_application.date = date.today()

    # add relationship to the original object
    rel = PremisRelationship(type='derivation')
    rel.subtype = 'has source'
    rel.related_object_type = 'ark'
    rel.related_object_id = original.mods.content.ark
    # relationship must also reference the migration event on the
    # original, which doesn't exist yet.  Generate a migration event
    # id now to use for both
    migration_event_id = uuid.uuid1()
    rel.related_event_type = 'UUID'
    rel.related_event_id = migration_event_id
    premis_ds.object.relationships.append(rel)

    ## NOTE: Due to a Fedora bug with checksums and file uri ingest,
    ## content datastream checksum must be cleared out before ingest
    ## and manually checked after.

    # store datastream checksum that would be sent to fedora
    e01_checksum = migrated.content.checksum
    # clear it out so Fedora can ingest without erroring
    migrated.content.checksum = None

    # ingest
    try:
        migrated.save('Ingest migrated version of %s' % original.pid)
        logger.debug('Migrated object ingested as %s' % migrated.pid)
    except DuplicateContent as err:
        raise Exception('Duplicate content detected for %s: %s %s',
                        original.pid, err, ', '.join(err.pids))
    # would probably be good to catch other fedora errors

    # remove temporary files
    for tmpfilename in [
            aff_file.name, e01_file.name, ftk_output.name, ftk_detail_output
    ]:
        os.remove(tmpfilename)

    # reinitialize migrated object, just to avoid any issues
    # with accessing ark uri for use in original object premis
    migrated = repo.get_object(migrated.pid, type=DiskImage)
    # verify checksum
    if migrated.content.checksum != e01_checksum:
        raise Exception('Checksum mismatch detected on E01 for %s',
                        migrated.pid)

    # once migrated object has been ingested,
    # update original object with migration information
    # - add rels-ext reference to migrated object
    original.migrated = migrated
    # - update premis with migration event and relationship
    migration_event = PremisEvent()
    migration_event.id_type = 'UUID'
    migration_event.id = migration_event_id
    migration_event.type = 'migration'
    migration_event.date = datetime.now().isoformat()
    migration_event.detail = migration_event_detail
    migration_event.outcome = 'Pass'
    migration_event.outcome_detail = migration_event_outcome
    migration_event.agent_type = 'fedora user'
    migration_event.agent_id = repo.username
    # premis wants both source and outcome objects linked in the event
    link_source = PremisLinkingObject(id_type='ark')
    link_source.id = original.mods.content.ark
    link_source.role = 'source'
    link_outcome = PremisLinkingObject(id_type='ark')
    link_outcome.id = migrated.mods.content.ark
    link_outcome.role = 'outcome'
    migration_event.linked_objects.extend([link_source, link_outcome])
    original.provenance.content.events.append(migration_event)
    # add relation to migrated object in to premis object
    rel = PremisRelationship(type='derivation')
    rel.subtype = 'is source of'
    rel.related_object_type = 'ark'
    rel.related_object_id = migrated.mods.content.ark
    rel.related_event_type = 'UUID'
    rel.related_event_id = migration_event.id
    original.provenance.content.object.relationships.append(rel)
    original.save()
    logger.debug('Original disk image updated with migration data')

    # remove aff migration temp dir and any remaining contents
    try:
        shutil.rmtree(tmpdir)
    except OSError:
        # tempdir removal could fail due to nfs files
        # wait a few seconds and try again
        time.sleep(3)
        try:
            shutil.rmtree(tmpdir)
        except OSError as os_err:
            logger.warning('Failed to remove tmpdir %s : %s', tmpdir, os_err)

    logger.info('Migrated %s AFF to %s E01' % (original.pid, migrated.pid))
    return 'Migrated %s to %s' % (original.pid, migrated.pid)
예제 #54
0
def batch_set_status(pid, status):
    repo = Repository()
    batch = repo.get_object(pid, type=SimpleCollection)
    # keep track of totals for success and failure
    success = 0
    error = 0

    # translate form status codes to fedora state code
    # TODO: shift this logic to arrangement object for re-use ?
    codes = {'Processed': 'A', 'Accessioned': 'I'}

    # target state for every object in the collection
    if status not in codes:
        err_msg = 'Status %s unknown' % status
        logger.error(err_msg)
        raise Exception(err_msg)
    else:
        state = codes[status]

    # finp all pids associated with this object
    pids = list(batch.rels_ext.content.objects(batch.uriref, relsextns.hasMember))

    for pid in pids:
        try:
            # pass in api from batch object to retain user credentials
            obj = ArrangementObject(batch.api, pid)
            obj.state = state
            obj.save('Marking as %s via SimpleCollection %s'
                     % (status, batch.pid))
            success += 1
        except Exception as e:
            logger.error('Failed to update %s : %s' % (pid, e))
            error += 1

    info = {
        'success': success,
        'error': error,
        'success_plural': '' if success == 1 else 's',
        'error_plural': '' if error == 1 else 's',
        'status': status
    }

    summary_msg = "Successfully updated %(success)s item%(success_plural)s; error updating %(error)s" % info

    # if not all objects were updated correctly, exit with error
    if error > 0:
        raise Exception(summary_msg)

    # FIXME: this is based on the current form logic, but could leave
    # some member items stranded in a different status than the parent object

    batch.mods.content.create_restrictions_on_access()
    batch.mods.content.restrictions_on_access.text = status  # Change collection status
    try:
        batch.save('Marking as %(status)s; updated %(success)s member item%(success_plural)s'
                   % info)

    except Exception as e:
        save_err = "Error updating SimpleCollection %s - %s" % (obj.pid, e)
        logger.error(save_err)
        raise Exception('%s; %s' % (save_err, summary_msg))

    # success
    return 'Successfully updated %(success)s item%(success_plural)s' % info