def test_raw_datastream_version(self): rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} self.obj.text.content = 'second version content' self.obj.text.save() # retrieve the view for each version and compare for version in self.obj.text.history().versions: # get the datastream version to compare with the response dsversion = self.obj.getDatastreamObject(self.obj.text.id, as_of_date=version.created) response = raw_datastream(rqst, self.obj.pid, self.obj.text.id, as_of_date=version.created) expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream as of %s' \ % (expected, got, version.created)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream as of %s' \ % (expected, got, version.created)) # should use version-specific checksum and size self.assertEqual(dsversion.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertEqual(dsversion.size, int(response['Content-Length'])) # should retrieve appropriate version of the content self.assertEqual(dsversion.content, response.content)
def file(request, pid): dsid = FileObject.file.id repo = Repository() obj = repo.get_object(pid, type=FileObject) filename = os.path.basename(obj.dc.content.title) extra_headers = { 'Content-Disposition': "attachment; filename=%s" % filename, } return raw_datastream(request, pid, dsid, type=FileObject, headers=extra_headers)
def rdfxml(request, aggId): dsid = AggregationObject.rdfxml.id repo = Repository() obj = repo.get_object(aggId, type=AggregationObject) filename = os.path.basename(obj.dc.content.title) extra_headers = { 'Content-Disposition': "attachment; filename=%s" % filename, } return raw_datastream(request, aggId, dsid, type=AggregationObject, headers=extra_headers)
def download(request, pid): 'Download disk image datastream contents' repo = Repository(request=request) obj = repo.get_object(pid, type=DiskImage) extra_headers = { 'Content-Disposition': "attachment; filename=%s.%s" % \ (obj.noid, obj.provenance.content.object.latest_format.name) } return raw_datastream(request, pid, DiskImage.content.id, repo=repo, headers=extra_headers)
def view_datastream(request, pid, dsid): 'Access raw object datastreams' # initialize local repo with logged-in user credentials & call generic view # use type-inferring repo to pick up rushdie file or generic arrangement response = raw_datastream(request, pid, dsid, repo=TypeInferringRepository(request=request)) # work-around for email MIME data : display as plain text so it # can be viewed in the browser if response['Content-Type'] == 'message/rfc822': response['Content-Type'] = 'text/plain' return response
def view_datastream(request, pid, dsid): 'Access raw object datastreams' # initialize local repo with logged-in user credentials & call generic view # use type-inferring repo to pick up rushdie file or generic arrangement response = raw_datastream(request, pid, dsid, repo=TypeInferringRepository(request=request)) # work-around for email MIME data : display as plain text so it # can be viewed in the browser if response.get('Content-Type') == 'message/rfc822': response['Content-Type'] = 'text/plain' return response
def download_file(request, pid): '''Download the master file datastream associated with a :class:`~genrepo.file.models.FileObject`''' repo = Repository(request=request) obj = init_by_cmodel(pid, request) # use original or edited filename as download filename extra_headers = { 'Content-Disposition': "attachment; filename=%s" % obj.master.label } # use generic raw datastream view from eulcore # - use the datastream id and digital object type returned by cmodel init return raw_datastream(request, pid, obj.master.id, type=obj.__class__, repo=repo, headers=extra_headers)
def test_raw_datastream_version(self): rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} self.obj.text.content = 'second version content' self.obj.text.save() # retrieve the view for each version and compare for version in self.obj.text.history().versions: # get the datastream version to compare with the response dsversion = self.obj.getDatastreamObject( self.obj.text.id, as_of_date=version.created) response = raw_datastream(rqst, self.obj.pid, self.obj.text.id, as_of_date=version.created) expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream as of %s' \ % (expected, got, version.created)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream as of %s' \ % (expected, got, version.created)) # should use version-specific checksum and size self.assertEqual( dsversion.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertEqual(dsversion.size, int(response['Content-Length'])) # should retrieve appropriate version of the content self.assertEqual(dsversion.content, response.content)
def test_raw_datastream(self): # tests for new version of raw_datastream introduced in 1.5, # based on old raw_datastream tests rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} # DC response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code content = b''.join(c for c in response.streaming_content) self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of DC' \ % (expected, got)) self.assertEqual( self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in force_text(content)) # RELS-EXT response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertTrue( response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams' ) self.assert_(isinstance(response, StreamingHttpResponse)) # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC') # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream_old(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # test HEAD request rqst.method = 'HEAD' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream view' \ % (expected, got)) self.assert_(isinstance(response, HttpResponse)) self.assertEqual(b'', response.content) # test that range requests are passed through to fedora # use IMAGE for testing since it is binary content # set range header in the request; bytes=0- : entire datastream rqst.META['HTTP_RANGE'] = 'bytes=0-' rqst.method = 'GET' response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content = b''.join(c for c in response.streaming_content) self.assertEqual(self.obj.image.size, len(content), 'range request of bytes=0- should return entire content (expected %d, got %d)' \ % (self.obj.image.size, len(content))) self.assertEqual(self.obj.image.size, int(response['Content-Length']), 'content-length header should be size of entire content (expected %d, got %d)' \ % (self.obj.image.size, int(response['Content-Length']))) expected = 'bytes 0-%d/%d' % (self.obj.image.size - 1, self.obj.image.size) self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) del response # set range request for partial beginning content; bytes=0-150 bytes_requested = 'bytes=0-150' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content_len = 151 content = b''.join(c for c in response.streaming_content) self.assertEqual(content_len, len(content), 'range request of %s should return %d bytes, got %d' \ % (bytes_requested, content_len, len(content))) self.assertEqual(content_len, int(response['Content-Length']), 'content-length header should be set to partial size %d (got %d)' \ % (content_len, int(response['Content-Length']))) expected = 'bytes 0-150/%d' % self.obj.image.size self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) # complex ranges not yet supported bytes_requested = 'bytes=1-10,30-50' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 416, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old invalid range request %s' \ % (expected, got, bytes_requested))
def test_raw_datastream(self): rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} # rqst.META.get.return_value = None # DC response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code content = response.content self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of DC' \ % (expected, got)) self.assertEqual(self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in content) # RELS-EXT response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertTrue(response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams') # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC') # check type handling? # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # HEAD request is handled internally, for efficiency rqst.method = 'HEAD' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream view' \ % (expected, got)) self.assertEqual('', response.content)
def test_raw_datastream_range(self): # test http range requests rqst = Mock() rqst.method = 'GET' rqst.META = {} # use IMAGE for testing since it is binary content # set range header in the request; bytes=0- : entire datastream rqst.META['HTTP_RANGE'] = 'bytes=0-' response = raw_datastream(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content = response.content self.assertEqual(self.obj.image.size, len(content), 'range request of bytes=0- should return entire content (expected %d, got %d)' \ % (self.obj.image.size, len(content))) self.assertEqual(self.obj.image.size, int(response['Content-Length']), 'content-length header should be size of entire content (expected %d, got %d)' \ % (self.obj.image.size, int(response['Content-Length']))) expected = 'bytes 0-%d/%d' % (self.obj.image.size - 1, self.obj.image.size) self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) del response # set range request for partial beginning content; bytes=0-150 bytes_requested = 'bytes=0-150' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content_len = 150 self.assertEqual(content_len, len(response.content), 'range request of %s should return %d bytes, got %d' \ % (bytes_requested, content_len, len(response.content))) self.assertEqual(content_len, int(response['Content-Length']), 'content-length header should be set to partial size %d (got %d)' \ % (content_len, int(response['Content-Length']))) expected = 'bytes 0-150/%d' % self.obj.image.size self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) # set range request for partial middle content; bytes=10-150 bytes_requested = 'bytes=10-150' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content_len = 150 - 10 self.assertEqual(content_len, len(response.content), 'range request of %s should return %d bytes, got %d' \ % (bytes_requested, content_len, len(response.content))) self.assertEqual(content_len, int(response['Content-Length']), 'content-length header should be set to partial size %d (got %d)' \ % (content_len, int(response['Content-Length']))) expected = 'bytes 10-150/%d' % self.obj.image.size self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) # set range request for partial end content; bytes=2000-3118 bytes_requested = 'bytes=2000-3118' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content_len = 3118 - 2000 self.assertEqual(content_len, len(response.content), 'range request of %s should return %d bytes, got %d' \ % (bytes_requested, content_len, len(response.content))) self.assertEqual(content_len, int(response['Content-Length']), 'content-length header should be set to partial size %d (got %d)' \ % (content_len, int(response['Content-Length']))) expected = 'bytes 2000-3118/%d' % self.obj.image.size self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) # invalid or unsupported ranges should return 416, range not satisfiable bytes_requested = 'bytes=10-9' # start > end rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 416, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream invalid range request %s' \ % (expected, got, bytes_requested)) # complex ranges not yet supported bytes_requested = 'bytes=1-10,30-50' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 416, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream invalid range request %s' \ % (expected, got, bytes_requested))
def download_audio(request, pid, type, extension=None): '''Serve out an audio datastream for the fedora object specified by pid. Can be used to download original (WAV) audio file or the access copy (MP3). :param pid: pid of the :class:`~keep.audio.models.AudioObject` instance from which the audio datastream should be returned :param type: which audio datastream to return - should be one of 'original' or 'access' :param extension: optional filename extension for access copy to distinguish between different types of access copies (currently MP3 or M4A) The :class:`django.http.HttpResponse` returned will have a Content-Disposition set to prompt the user to download the file with a filename based on the object noid and an appropriate file extension for the type of audio requested. ''' repo = Repository(request=request) # retrieve the object so we can use it to set the download filename obj = repo.get_object(pid, type=AudioObject) # user needs either *play* or *download* permissions # - could be any audio or researcher-accessible only, which additionally # requires checking object is researcher-accessible # for now, use presence of 'HTTP_RANGE' in request to differentiate # jplayer requests from straight downloads # NOTE: this would not be too difficult for a savvy user to circumvent # (if they know what we are checking), but is intended mainly to prevent # unwanted access by staff and researchers in the reading room # if http range is present in request, check for play permissions # (also requires that request is for access copy, not original) if 'HTTP_RANGE' in request.META: if not (request.user.has_perm('audio.play_audio') and type == 'access') and \ not (request.user.has_perm('audio.play_researcher_audio') and \ bool(obj.researcher_access) and type == 'access'): return prompt_login_or_403(request) # otherwise, check for download permissions else: # user either needs download audio permissions OR # if they can download researcher audio and object must be researcher-accessible if not request.user.has_perm('audio.download_audio') and \ not (request.user.has_perm('audio.download_researcher_audio') and \ bool(obj.researcher_access)): return prompt_login_or_403(request) # determine which datastream is requsted & set datastream id & file extension if type == 'original': dsid = AudioObject.audio.id file_ext = 'wav' elif type == 'access': dsid = AudioObject.compressed_audio.id # make sure the requested file extension matches the datastream if (obj.compressed_audio.mimetype == 'audio/mp4' and \ extension != 'm4a') or \ (obj.compressed_audio.mimetype == 'audio/mpeg' and \ extension != 'mp3'): raise Http404 file_ext = extension else: # any other type is not supported raise Http404 extra_headers = { 'Content-Disposition': 'attachment; filename="%s.%s"' % (obj.noid, file_ext) } # use generic raw datastream view from eulfedora return raw_datastream(request, pid, dsid, repo=repo, headers=extra_headers)
def test_raw_datastream(self): # tests for new version of raw_datastream introduced in 1.5, # based on old raw_datastream tests rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} # DC response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code content = b''.join(c for c in response.streaming_content) self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of DC' \ % (expected, got)) self.assertEqual(self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in force_text(content)) # RELS-EXT response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertTrue(response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams') self.assert_(isinstance(response, StreamingHttpResponse)) # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC') # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream_old(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # test HEAD request rqst.method = 'HEAD' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream view' \ % (expected, got)) self.assert_(isinstance(response, HttpResponse)) self.assertEqual(b'', response.content) # test that range requests are passed through to fedora # use IMAGE for testing since it is binary content # set range header in the request; bytes=0- : entire datastream rqst.META['HTTP_RANGE'] = 'bytes=0-' rqst.method = 'GET' response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content = b''.join(c for c in response.streaming_content) self.assertEqual(self.obj.image.size, len(content), 'range request of bytes=0- should return entire content (expected %d, got %d)' \ % (self.obj.image.size, len(content))) self.assertEqual(self.obj.image.size, int(response['Content-Length']), 'content-length header should be size of entire content (expected %d, got %d)' \ % (self.obj.image.size, int(response['Content-Length']))) expected = 'bytes 0-%d/%d' % (self.obj.image.size - 1, self.obj.image.size) self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) del response # set range request for partial beginning content; bytes=0-150 bytes_requested = 'bytes=0-150' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content_len = 151 content = b''.join(c for c in response.streaming_content) self.assertEqual(content_len, len(content), 'range request of %s should return %d bytes, got %d' \ % (bytes_requested, content_len, len(content))) self.assertEqual(content_len, int(response['Content-Length']), 'content-length header should be set to partial size %d (got %d)' \ % (content_len, int(response['Content-Length']))) expected = 'bytes 0-150/%d' % self.obj.image.size self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) # complex ranges not yet supported bytes_requested = 'bytes=1-10,30-50' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 416, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old invalid range request %s' \ % (expected, got, bytes_requested))
def download_video(request, pid, type, extension=None): '''Serve out an video datastream for the fedora object specified by pid. Can be used to download original file or the access copy. :param pid: pid of the :class:`~keep.vidoe.models.Video` instance from which the vidoe datastream should be returned :param type: which video datastream to return - should be one of 'original' or 'access' :param extension: optional filename extension for access copy to distinguish between different types of access copies The :class:`django.http.HttpResponse` returned will have a Content-Disposition set to prompt the user to download the file with a filename based on the object noid and an appropriate file extension for the type of video requested. ''' repo = Repository(request=request) # retrieve the object so we can use it to set the download filename obj = repo.get_object(pid, type=Video) # user needs either *play* or *download* permissions # - could be any video or researcher-accessible only, which additionally # requires checking object is researcher-accessible # for now, use presence of 'HTTP_RANGE' in request to differentiate # jplayer requests from straight downloads # NOTE: this would not be too difficult for a savvy user to circumvent # (if they know what we are checking), but is intended mainly to prevent # unwanted access by staff and researchers in the reading room # if http range is present in request, check for play permissions # (also requires that request is for access copy, not original) if 'HTTP_RANGE' in request.META: playable = (type == 'access' and (request.user.has_perm('video.play_video')) or (request.user.has_perm('video.play_researcher_video') and bool(obj.researcher_access))) if not playable: return prompt_login_or_403(request) # otherwise, check for download permissions else: # user either needs download video permissions OR # if they can download researcher audio and object must be researcher-accessible downloadable = request.user.has_perm('video.download_video') or \ (request.user.has_perm('video.download_researcher_video') and bool(obj.researcher_access)) if not downloadable: return prompt_login_or_403(request) # determine which datastream is requsted & set datastream id & file extension if type == 'original': dsid = Video.content.id # set file extension based on the datastream content type, # with a fallback for generic binary (should not happen in production) file_ext = Video.allowed_master_mimetypes.get(obj.content.mimetype, 'bin') elif type == 'access': dsid = Video.access_copy.id # set file extension based on the datastream content file_ext = Video.allowed_access_mimetypes[obj.access_copy.mimetype] else: # any other type is not supported raise Http404 extra_headers = { 'Content-Disposition': 'attachment; filename="%s.%s"' % (obj.noid, file_ext) } # use generic raw datastream view from eulfedora return raw_datastream(request, pid, dsid, repo=repo, headers=extra_headers) # errors accessing Fedora will fall through to default 500 error handling
def view_datastream(request, pid, dsid): 'Access raw object datastreams (MODS, RELS-EXT, DC, DigitalTech, SourceTech, JHOVE)' # initialize local repo with logged-in user credentials & call generic view return raw_datastream(request, pid, dsid, repo=Repository(request=request))
def download_video(request, pid, type, extension=None): '''Serve out an video datastream for the fedora object specified by pid. Can be used to download original file or the access copy. :param pid: pid of the :class:`~keep.vidoe.models.Video` instance from which the vidoe datastream should be returned :param type: which video datastream to return - should be one of 'original' or 'access' :param extension: optional filename extension for access copy to distinguish between different types of access copies The :class:`django.http.HttpResponse` returned will have a Content-Disposition set to prompt the user to download the file with a filename based on the object noid and an appropriate file extension for the type of video requested. ''' repo = Repository(request=request) # retrieve the object so we can use it to set the download filename obj = repo.get_object(pid, type=Video) # user needs either *play* or *download* permissions # - could be any video or researcher-accessible only, which additionally # requires checking object is researcher-accessible # for now, use presence of 'HTTP_RANGE' in request to differentiate # jplayer requests from straight downloads # NOTE: this would not be too difficult for a savvy user to circumvent # (if they know what we are checking), but is intended mainly to prevent # unwanted access by staff and researchers in the reading room # if http range is present in request, check for play permissions # (also requires that request is for access copy, not original) if 'HTTP_RANGE' in request.META: playable = (type == 'access' and (request.user.has_perm('video.play_video')) or (request.user.has_perm('video.play_researcher_video') and bool(obj.researcher_access))) if not playable: return prompt_login_or_403(request) # otherwise, check for download permissions else: # user either needs download video permissions OR # if they can download researcher audio and object must be researcher-accessible downloadable = request.user.has_perm('video.download_video') or \ (request.user.has_perm('video.download_researcher_video') and bool(obj.researcher_access)) if not downloadable: return prompt_login_or_403(request) # determine which datastream is requsted & set datastream id & file extension if type == 'original': dsid = Video.content.id # set file extension based on the datastream content type, # with a fallback for generic binary (should not happen in production) file_ext = Video.allowed_master_mimetypes.get(obj.content.mimetype, 'bin') elif type == 'access': dsid = Video.access_copy.id # set file extension based on the datastream content file_ext = Video.allowed_access_mimetypes[obj.access_copy.mimetype] else: # any other type is not supported raise Http404 extra_headers = { 'Content-Disposition': 'attachment; filename="%s.%s"' % (obj.noid, file_ext) } # use generic raw datastream view from eulfedora return raw_datastream(request, pid, dsid, repo=repo, headers=extra_headers)
def view_datastream(request, pid, dsid): 'Access raw object datastreams (MODS, RELS-EXT, DC)' # initialize local repo with logged-in user credentials & call generic view return raw_datastream(request, pid, dsid, repo=Repository(request=request))
def test_raw_datastream(self): rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META.get.return_value = None # DC response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of DC' \ % (expected, got)) self.assertEqual( self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in response.content) # RELS-EXT response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertTrue( response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams' ) # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC') # check type handling? # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # HEAD request is handled internally, for efficiency rqst.method = 'HEAD' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream view' \ % (expected, got)) self.assertEqual('', response.content)