def test_ingest_utf8(self): # ingest with unicode log message obj = self.loadFixtureData('basic-object.foxml') response = self.rest_api.ingest(obj, logMessage=self.unicode_test_str) pid = response.text self.assertTrue(pid) response = self.rest_api.getObjectXML(pid) response.encoding = 'utf-8' # ensure requests decodes as utf-8 self.assert_(u'<audit:justification>%s</audit:justification>' % self.unicode_test_str in response.text) self.rest_api.purgeObject(force_text(pid)) # ingest with unicode object label # convert to text to replace string, then convert back to bytes obj = force_bytes(force_text(obj).replace( u"A test object", self.unicode_test_str)) response = self.rest_api.ingest(obj) pid = response.text self.assertTrue(pid) # object label in profile should match the unicode sent response = self.rest_api.getObjectProfile(pid) response.encoding = 'utf-8' # ensure requests decodes as utf-8 self.assert_(u'<objLabel>%s</objLabel>' % self.unicode_test_str in response.text) self.rest_api.purgeObject(force_text(pid))
def get_datastream_info(self, dsinfo): '''Use regular expressions to pull datastream [version] details (id, mimetype, size, and checksum) for binary content, in order to sanity check the decoded data. :param dsinfo: text content just before a binaryContent tag :returns: dict with keys for id, mimetype, size, type and digest, or None if no match is found ''' # we only need to look at the end of this section of content dsinfo = dsinfo[-750:] # if not enough content is present, include the end of # the last read chunk, if available if len(dsinfo) < 750 and self.end_of_last_chunk is not None: dsinfo = self.end_of_last_chunk + dsinfo # force text needed for python 3 compatibility (in python 3 # dsinfo is bytes instead of a string) try: text = force_text(dsinfo) except UnicodeDecodeError as err: # it's possible to see a unicode character split across # read blocks; if we get an "invalid start byte" unicode # decode error, try converting the text without the first # character; if that's the problem, it's not needed # for datastream context if 'invalid start byte' in force_text(err): text = force_text(dsinfo[1:]) else: raise err infomatch = self.dsinfo_regex.search(text) if infomatch: return infomatch.groupdict()
def test_encrypt_decrypt(text): encrypted = cryptutil.encrypt(text) self.assertNotEqual(text, encrypted, "encrypted text should not match original") decrypted = cryptutil.decrypt(encrypted) self.assertEqual(text, force_text(decrypted), "decrypted text (%s) should match original encrypted text (%s)" % (force_text(decrypted), text))
def test_purgeRelationship(self): # add relation to purg self.rest_api.addRelationship( self.pid, "info:fedora/%s" % self.pid, predicate=force_text(modelns.hasModel), object="info:fedora/pid:123" ) print(self.pid) print(force_text(self.pid)) print(type(self.pid)) print(self.fedora_fixtures_ingested) purged = self.rest_api.purgeRelationship( self.pid, "info:fedora/%s" % self.pid, force_text(modelns.hasModel), "info:fedora/pid:123" ) self.assertEqual(purged, True) # purge non-existent rel on valid pid purged = self.rest_api.purgeRelationship( self.pid, "info:fedora/%s" % self.pid, self.rel_owner, "johndoe", isLiteral=True ) self.assertFalse(purged) # bogus pid self.assertRaises( RequestFailed, self.rest_api.purgeRelationship, "bogus:pid", "info:fedora/bogus:pid", self.rel_owner, "johndoe", True, )
def test_purgeRelationship(self): # add relation to purg self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid, predicate=force_text(modelns.hasModel), object='info:fedora/pid:123') print(self.pid) print(force_text(self.pid)) print(type(self.pid)) print(self.fedora_fixtures_ingested) purged = self.rest_api.purgeRelationship(self.pid, 'info:fedora/%s' % self.pid, force_text(modelns.hasModel), 'info:fedora/pid:123') self.assertEqual(purged, True) # purge non-existent rel on valid pid purged = self.rest_api.purgeRelationship(self.pid, 'info:fedora/%s' % self.pid, self.rel_owner, 'johndoe', isLiteral=True) self.assertFalse(purged) # bogus pid self.assertRaises(RequestFailed, self.rest_api.purgeRelationship, "bogus:pid", 'info:fedora/bogus:pid', self.rel_owner, "johndoe", True)
def test_encrypt_decrypt(text): encrypted = cryptutil.encrypt(text) self.assertNotEqual(text, encrypted, "encrypted text should not match original") decrypted = cryptutil.decrypt(encrypted) self.assertEqual( text, force_text(decrypted), "decrypted text (%s) should match original encrypted text (%s)" % (force_text(decrypted), text))
def test_ingest_without_pid(self): obj = self.loadFixtureData('basic-object.foxml') pid = self.repo.ingest(force_bytes(obj)) self.assertTrue(pid) self.repo.purge_object(force_text(pid)) # test ingesting with log message pid = self.repo.ingest(obj, "this is my test ingest message") # ingest message is stored in AUDIT datastream # - can currently only be accessed by retrieving entire object xml r = self.repo.api.getObjectXML(force_text(pid)) self.assertTrue("this is my test ingest message" in r.text) purged = self.repo.purge_object(force_text(pid), "removing test ingest object") self.assertTrue(purged)
def test_getRelationships(self): # add relations to retrieve self.rest_api.addRelationship( self.pid, "info:fedora/%s" % self.pid, force_text(modelns.hasModel), "info:fedora/pid:123", False ) self.rest_api.addRelationship(self.pid, "info:fedora/%s" % self.pid, self.rel_owner, "johndoe", True) r = self.rest_api.getRelationships(self.pid) graph = parse_rdf(r.content, r.url) # check total number: fedora-system cmodel + two just added self.assertEqual(3, len(list(graph))) # newly added triples should be included in the graph self.assert_((URIRef("info:fedora/%s" % self.pid), modelns.hasModel, URIRef("info:fedora/pid:123")) in graph) self.assertEqual( "johndoe", str(graph.value(subject=URIRef("info:fedora/%s" % self.pid), predicate=URIRef(self.rel_owner))) ) # get rels for a single predicate r = self.rest_api.getRelationships(self.pid, predicate=self.rel_owner) graph = parse_rdf(r.content, r.url) # should include just the one we asked for self.assertEqual(1, len(list(graph))) self.assertEqual( "johndoe", str(graph.value(subject=URIRef("info:fedora/%s" % self.pid), predicate=URIRef(self.rel_owner))) )
def curl_upload_file(filename): print("curl upload") conn = pycurl.Curl() auth = base64.b64encode(force_bytes("%s:%s" % (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD))) headers = {"Authorization": "Basic %s" % force_text(auth)} conn.setopt(conn.URL, "%supload" % testsettings.FEDORA_ROOT_NONSSL) conn.setopt(pycurl.VERBOSE, 1) conn.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()]) filesize = os.path.getsize(filename) widgets = [ "Upload: ", progressbar.widgets.Percentage(), " ", progressbar.widgets.Bar(), " ", progressbar.widgets.ETA(), " ", progressbar.widgets.FileTransferSpeed(), ] # set initial progressbar size based on file; will be slightly larger because # of multipart boundary content pbar = progressbar.ProgressBar(widgets=widgets, maxval=filesize).start() def progress(dl_total, dl, up_total, up): # update the progressbar to actual maxval (content + boundary) pbar.max_value = up_total # update current status pbar.update(up) conn.setopt( conn.HTTPPOST, [ ( "file", ( # upload the contents of this file conn.FORM_FILE, filename, # specify a different file name for the upload conn.FORM_FILENAME, "file", # specify a different content type # conn.FORM_CONTENTTYPE, 'application/x-python', ), ) ], ) # conn.setopt(conn.CURLOPT_READFUNCTION) conn.setopt(conn.XFERINFOFUNCTION, progress) conn.setopt(conn.NOPROGRESS, False) conn.perform() # HTTP response code, e.g. 200. print("Status: %d" % conn.getinfo(conn.RESPONSE_CODE)) # Elapsed time for the transfer. print("Time: %f" % conn.getinfo(conn.TOTAL_TIME)) conn.close()
def test_getRelationships(self): # add relations to retrieve self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid, force_text(modelns.hasModel), "info:fedora/pid:123", False) self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid, self.rel_owner, "johndoe", True) r = self.rest_api.getRelationships(self.pid) graph = parse_rdf(r.content, r.url) # check total number: fedora-system cmodel + two just added self.assertEqual(3, len(list(graph))) # newly added triples should be included in the graph self.assert_((URIRef('info:fedora/%s' % self.pid), modelns.hasModel, URIRef('info:fedora/pid:123')) in graph) self.assertEqual( 'johndoe', str( graph.value(subject=URIRef('info:fedora/%s' % self.pid), predicate=URIRef(self.rel_owner)))) # get rels for a single predicate r = self.rest_api.getRelationships(self.pid, predicate=self.rel_owner) graph = parse_rdf(r.content, r.url) # should include just the one we asked for self.assertEqual(1, len(list(graph))) self.assertEqual( 'johndoe', str( graph.value(subject=URIRef('info:fedora/%s' % self.pid), predicate=URIRef(self.rel_owner))))
def index_data(request, id, repo=None): '''Return the fields and values to be indexed for a single object as JSON. Index content is generated via :meth:`eulfedora.models.DigitalObject.index_data`. :param id: id of the object to be indexed; in this case a Fedora pid ''' # Ensure permission to this resource is allowed. Currently based on IP only. if _permission_denied_check(request): return HttpResponseForbidden('Access to this web service was denied.', content_type='text/html') if repo is None: repo_opts = {} # if credentials are specified via Basic Auth, use them for Fedora access auth_info = request.META.get('HTTP_AUTHORIZATION', None) basic = 'Basic ' if auth_info and auth_info.startswith(basic): basic_info = auth_info[len(basic):] basic_info_decoded = base64.b64decode(force_bytes(basic_info)) # NOTE: codecs.decode works everywhere but python 3.3. which # complains about an unknown encoding # basic_info_decoded = codecs.decode(force_bytes(basic_info), 'base64') u, p = force_text(basic_info_decoded).split(':') repo_opts.update({'username': u, 'password': p}) repo = TypeInferringRepository(**repo_opts) try: obj = repo.get_object(id) return HttpResponse(json.dumps(obj.index_data()), content_type='application/json') except RequestFailed: # for now, treat any failure getting the object from Fedora as a 404 # (could also potentially be a permission error) raise Http404
def test_login_and_store_credentials_in_session(self): # only testing custom logic, which happens on POST # everything else is handled by django.contrib.auth mockrequest = Mock() mockrequest.method = 'POST' def not_logged_in(rqst): rqst.user.is_authenticated.return_value = False def set_logged_in(rqst): rqst.user.is_authenticated.return_value = True rqst.POST.get.return_value = "TEST_PASSWORD" # failed login with patch('eulfedora.views.authviews.login', new=Mock(side_effect=not_logged_in)): mockrequest.session = dict() response = login_and_store_credentials_in_session(mockrequest) self.assert_(FEDORA_PASSWORD_SESSION_KEY not in mockrequest.session, 'user password for fedora should not be stored in session on failed login') # successful login with patch('eulfedora.views.authviews.login', new=Mock(side_effect=set_logged_in)): response = login_and_store_credentials_in_session(mockrequest) self.assert_(FEDORA_PASSWORD_SESSION_KEY in mockrequest.session, 'user password for fedora should be stored in session on successful login') # test password stored in the mock request pwd = mockrequest.POST.get() # encrypted password stored in session sessionpwd = mockrequest.session[FEDORA_PASSWORD_SESSION_KEY] self.assertNotEqual(pwd, sessionpwd, 'password should not be stored in the session without encryption') self.assertEqual(pwd, force_text(cryptutil.decrypt(sessionpwd)), 'user password stored in session is encrypted')
def to_blocksize(password): # pad the text to create a string of acceptable block size for the encryption algorithm width = len(password) + \ (EncryptionAlgorithm.block_size - len(password) % EncryptionAlgorithm.block_size) block = password.ljust( width, force_text(ENCRYPT_PAD_CHARACTER) if six.PY3 else ENCRYPT_PAD_CHARACTER) return block
def test_addRelationship(self): # rel to resource added = self.rest_api.addRelationship( self.pid, "info:fedora/%s" % self.pid, force_text(modelns.hasModel), "info:fedora/pid:123", False ) self.assertTrue(added) r = self.rest_api.getDatastreamDissemination(self.pid, "RELS-EXT") self.assert_("<hasModel" in r.text) self.assert_('rdf:resource="info:fedora/pid:123"' in r.text) # literal added = self.rest_api.addRelationship(self.pid, "info:fedora/%s" % self.pid, self.rel_owner, "johndoe", True) self.assertTrue(added) r = self.rest_api.getDatastreamDissemination(self.pid, "RELS-EXT") self.assert_("<owner" in r.text) self.assert_(">johndoe<" in r.text) # bogus pid self.assertRaises( RequestFailed, self.rest_api.addRelationship, "bogus:pid", "info:fedora/bogus:pid", self.rel_owner, "johndoe", True, )
def test_addDatastream(self): # returns result from addDatastream call and info used for add ((added, msg), ds) = self._add_text_datastream() self.assertTrue(added) # response from addDatastream r = self.rest_api.getObjectXML(self.pid) message = r.content self.assert_(ds['logMessage'] in force_text(message)) r = self.rest_api.listDatastreams(self.pid) self.assert_('<datastream dsid="%(id)s" label="%(label)s" mimeType="%(mimeType)s" />' % ds in r.text) r = self.rest_api.getDatastream(self.pid, ds['id']) ds_profile = r.text self.assert_('dsID="%s"' % ds['id'] in ds_profile) self.assert_('<dsLabel>%s</dsLabel>' % ds['label'] in ds_profile) self.assert_('<dsVersionID>%s.0</dsVersionID>' % ds['id'] in ds_profile) self.assert_('<dsCreateDate>%s' % self.today in ds_profile) self.assert_('<dsState>A</dsState>' in ds_profile) self.assert_('<dsMIME>%s</dsMIME>' % ds['mimeType'] in ds_profile) self.assert_('<dsControlGroup>%s</dsControlGroup>' % ds['controlGroup'] in ds_profile) self.assert_('<dsVersionable>true</dsVersionable>' in ds_profile) # content returned from fedora should be exactly what we started with r = self.rest_api.getDatastreamDissemination(self.pid, ds['id']) self.assertEqual(self.TEXT_CONTENT, r.text)
def index_data(request, id, repo=None): '''Return the fields and values to be indexed for a single object as JSON. Index content is generated via :meth:`eulfedora.models.DigitalObject.index_data`. :param id: id of the object to be indexed; in this case a Fedora pid ''' #Ensure permission to this resource is allowed. Currently based on IP only. if _permission_denied_check(request): return HttpResponseForbidden('Access to this web service was denied.', content_type='text/html') if repo is None: repo_opts = {} # if credentials are specified via Basic Auth, use them for Fedora access auth_info = request.META.get('HTTP_AUTHORIZATION', None) basic = 'Basic ' if auth_info and auth_info.startswith(basic): basic_info = auth_info[len(basic):] basic_info_decoded = codecs.decode(force_bytes(basic_info), 'base64') u, p = force_text(basic_info_decoded).split(':') repo_opts.update({'username': u, 'password': p}) repo = TypeInferringRepository(**repo_opts) try: obj = repo.get_object(id) return HttpResponse(json.dumps(obj.index_data()), content_type='application/json') except RequestFailed: # for now, treat any failure getting the object from Fedora as a 404 # (could also potentially be a permission error) raise Http404
def index_data(request, id, repo=None): """Return the fields and values to be indexed for a single object as JSON. Index content is generated via :meth:`eulfedora.models.DigitalObject.index_data`. :param id: id of the object to be indexed; in this case a Fedora pid """ # Ensure permission to this resource is allowed. Currently based on IP only. if _permission_denied_check(request): return HttpResponseForbidden("Access to this web service was denied.", content_type="text/html") if repo is None: repo_opts = {} # if credentials are specified via Basic Auth, use them for Fedora access auth_info = request.META.get("HTTP_AUTHORIZATION", None) basic = "Basic " if auth_info and auth_info.startswith(basic): basic_info = auth_info[len(basic) :] basic_info_decoded = codecs.decode(force_bytes(basic_info), "base64") u, p = force_text(basic_info_decoded).split(":") repo_opts.update({"username": u, "password": p}) repo = TypeInferringRepository(**repo_opts) try: obj = repo.get_object(id) return HttpResponse(json.dumps(obj.index_data()), content_type="application/json") except RequestFailed: # for now, treat any failure getting the object from Fedora as a 404 # (could also potentially be a permission error) raise Http404
def ingestFixture(self, fname): obj = self.loadFixtureData(fname) pid = self.repo.ingest(force_text(obj)) if pid: # we'd like this always to be true. if ingest fails we should # throw an exception. that probably hasn't been thoroughly # tested yet, though, so we'll check it until it has been. self.append_pid(pid)
def curl_upload_file(filename): print('curl upload') conn = pycurl.Curl() auth = base64.b64encode( force_bytes("%s:%s" % (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD))) headers = {'Authorization': 'Basic %s' % force_text(auth)} conn.setopt(conn.URL, '%supload' % testsettings.FEDORA_ROOT_NONSSL) conn.setopt(pycurl.VERBOSE, 1) conn.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()]) filesize = os.path.getsize(filename) widgets = [ 'Upload: ', progressbar.widgets.Percentage(), ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(), ' ', progressbar.widgets.FileTransferSpeed() ] # set initial progressbar size based on file; will be slightly larger because # of multipart boundary content pbar = progressbar.ProgressBar(widgets=widgets, maxval=filesize).start() def progress(dl_total, dl, up_total, up): # update the progressbar to actual maxval (content + boundary) pbar.max_value = up_total # update current status pbar.update(up) conn.setopt( conn.HTTPPOST, [ ( 'file', ( # upload the contents of this file conn.FORM_FILE, filename, # specify a different file name for the upload conn.FORM_FILENAME, 'file', # specify a different content type # conn.FORM_CONTENTTYPE, 'application/x-python', )), ]) # conn.setopt(conn.CURLOPT_READFUNCTION) conn.setopt(conn.XFERINFOFUNCTION, progress) conn.setopt(conn.NOPROGRESS, False) conn.perform() # HTTP response code, e.g. 200. print('Status: %d' % conn.getinfo(conn.RESPONSE_CODE)) # Elapsed time for the transfer. print('Time: %f' % conn.getinfo(conn.TOTAL_TIME)) conn.close()
def test_purgeRelationship(self): # add relation to purg self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid, predicate=force_text(modelns.hasModel), object='info:fedora/pid:123') purged = self.rest_api.purgeRelationship(self.pid, 'info:fedora/%s' % self.pid, force_text(modelns.hasModel), 'info:fedora/pid:123') self.assertEqual(purged, True) # purge non-existent rel on valid pid purged = self.rest_api.purgeRelationship(self.pid, 'info:fedora/%s' % self.pid, self.rel_owner, 'johndoe', isLiteral=True) self.assertFalse(purged) # bogus pid self.assertRaises(RequestFailed, self.rest_api.purgeRelationship, "bogus:pid", 'info:fedora/bogus:pid', self.rel_owner, "johndoe", True)
def test_addDatastream(self): # returns result from addDatastream call and info used for add ((added, msg), ds) = self._add_text_datastream() self.assertTrue(added) # response from addDatastream r = self.rest_api.getObjectXML(self.pid) message = r.content self.assert_(ds['logMessage'] in force_text(message)) r = self.rest_api.listDatastreams(self.pid) self.assert_('<datastream dsid="%(id)s" label="%(label)s" mimeType="%(mimeType)s" />' % ds in r.text) r = self.rest_api.getDatastream(self.pid, ds['id']) ds_profile = r.text self.assert_('dsID="%s"' % ds['id'] in ds_profile) self.assert_('<dsLabel>%s</dsLabel>' % ds['label'] in ds_profile) self.assert_('<dsVersionID>%s.0</dsVersionID>' % ds['id'] in ds_profile) self.assert_('<dsCreateDate>%s' % self.today in ds_profile) self.assert_('<dsState>A</dsState>' in ds_profile) self.assert_('<dsMIME>%s</dsMIME>' % ds['mimeType'] in ds_profile) self.assert_('<dsControlGroup>%s</dsControlGroup>' % ds['controlGroup'] in ds_profile) self.assert_('<dsVersionable>true</dsVersionable>' in ds_profile) # content returned from fedora should be exactly what we started with r = self.rest_api.getDatastreamDissemination(self.pid, ds['id']) self.assertEqual(self.TEXT_CONTENT, r.text) # invalid checksum self.assertRaises( ChecksumMismatch, self.rest_api.addDatastream, self.pid, "TEXT2", "text datastream", mimeType="text/plain", logMessage="creating TEXT2", content='<some> text content</some>', checksum='totally-bogus-not-even-an-MD5', checksumType='MD5') # invalid checksum without a checksum type - warning, but no checksum mismatch with warnings.catch_warnings(record=True) as w: self.rest_api.addDatastream( self.pid, "TEXT2", "text datastream", mimeType="text/plain", logMessage="creating TEXT2", content='<some> text content</some>', checksum='totally-bogus-not-even-an-MD5', checksumType=None) self.assertEqual(1, len(w), 'calling addDatastream with checksum but no checksum type should generate a warning') self.assert_('Fedora will ignore the checksum' in str(w[0].message)) # attempt to add to a non-existent object FILE = tempfile.NamedTemporaryFile(mode="w", suffix=".txt") FILE.write("bogus") FILE.flush() with open(FILE.name) as textfile: self.assertRaises(RequestFailed, self.rest_api.addDatastream, 'bogus:pid', 'TEXT', 'text datastream', mimeType='text/plain', logMessage='creating new datastream', controlGroup='M', content=textfile) FILE.close()
def test_addDatastream(self): # returns result from addDatastream call and info used for add ((added, msg), ds) = self._add_text_datastream() self.assertTrue(added) # response from addDatastream r = self.rest_api.getObjectXML(self.pid) message = r.content self.assert_(ds['logMessage'] in force_text(message)) r = self.rest_api.listDatastreams(self.pid) self.assert_('<datastream dsid="%(id)s" label="%(label)s" mimeType="%(mimeType)s" />' % ds in r.text) r = self.rest_api.getDatastream(self.pid, ds['id']) ds_profile = r.text self.assert_('dsID="%s"' % ds['id'] in ds_profile) self.assert_('<dsLabel>%s</dsLabel>' % ds['label'] in ds_profile) self.assert_('<dsVersionID>%s.0</dsVersionID>' % ds['id'] in ds_profile) self.assert_('<dsCreateDate>%s' % self.today in ds_profile) self.assert_('<dsState>A</dsState>' in ds_profile) self.assert_('<dsMIME>%s</dsMIME>' % ds['mimeType'] in ds_profile) self.assert_('<dsControlGroup>%s</dsControlGroup>' % ds['controlGroup'] in ds_profile) self.assert_('<dsVersionable>true</dsVersionable>' in ds_profile) # content returned from fedora should be exactly what we started with r = self.rest_api.getDatastreamDissemination(self.pid, ds['id']) self.assertEqual(self.TEXT_CONTENT, r.text) # invalid checksum self.assertRaises(ChecksumMismatch, self.rest_api.addDatastream, self.pid, "TEXT2", "text datastream", mimeType="text/plain", logMessage="creating TEXT2", content='<some> text content</some>', checksum='totally-bogus-not-even-an-MD5', checksumType='MD5') # invalid checksum without a checksum type - warning, but no checksum mismatch with warnings.catch_warnings(record=True) as w: self.rest_api.addDatastream(self.pid, "TEXT2", "text datastream", mimeType="text/plain", logMessage="creating TEXT2", content='<some> text content</some>', checksum='totally-bogus-not-even-an-MD5', checksumType=None) self.assertEqual(1, len(w), 'calling addDatastream with checksum but no checksum type should generate a warning') self.assert_('Fedora will ignore the checksum' in str(w[0].message)) # attempt to add to a non-existent object FILE = tempfile.NamedTemporaryFile(mode="w", suffix=".txt") FILE.write("bogus") FILE.flush() with open(FILE.name) as textfile: self.assertRaises(RequestFailed, self.rest_api.addDatastream, 'bogus:pid', 'TEXT', 'text datastream', mimeType='text/plain', logMessage='creating new datastream', controlGroup='M', content=textfile) FILE.close()
def test_getDissemination(self): # testing with built-in fedora dissemination r = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex") self.assert_("<title>Object Items HTML Presentation</title>" in r.text) self.assert_(self.pid in r.text) # return_http_response response = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex") self.assert_(isinstance(response, requests.Response), "getDissemination should return a response object") # datastream content should still be accessible self.assert_(self.pid in force_text(response.content))
def test_getDissemination(self): # testing with built-in fedora dissemination r = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex") self.assert_('<title>Object Items HTML Presentation</title>' in r.text) self.assert_(self.pid in r.text) # return_http_response response = self.rest_api.getDissemination(self.pid, "fedora-system:3", "viewItemIndex") self.assert_(isinstance(response, requests.Response), 'getDissemination should return a response object') # datastream content should still be accessible self.assert_(self.pid in force_text(response.content))
def testGetPredicates(self): # get all predicates for test object predicates = list(self.risearch.get_predicates(self.object.uri, None)) self.assertTrue(force_text(modelns.hasModel) in predicates) self.assertTrue(self.rel_isMemberOf in predicates) self.assertTrue(self.rel_owner in predicates) # resource predicates = list(self.risearch.get_predicates(self.object.uri, self.related.uri)) self.assertEqual(predicates[0], self.rel_isMemberOf) self.assertEqual(len(predicates), 1) # literal predicates = list(self.risearch.get_predicates(self.object.uri, "'testuser'")) self.assertEqual(predicates[0], self.rel_owner) self.assertEqual(len(predicates), 1)
def test_index_data(self): # create a test object for testing index data view repo = Repository() testobj = repo.get_object(type=SimpleObject) testobj.label = 'test object' testobj.owner = 'tester' testobj.save() self.pids.append(testobj.pid) # test with request IP not allowed to access the service with override_settings(EUL_INDEXER_ALLOWED_IPS=['0.13.23.134']): response = index_data(self.request, testobj.pid) expected, got = 403, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for index_data view with request IP not in configured list' \ % (expected, got)) # test with request IP allowed to hit the service with override_settings(EUL_INDEXER_ALLOWED_IPS=[self.request_ip]): response = index_data(self.request, testobj.pid) expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for index_data view' \ % (expected, got)) expected, got = 'application/json', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on index_data view' \ % (expected, got)) response_data = json.loads(response.content.decode('utf-8')) self.assertEqual( testobj.index_data(), response_data, 'Response content loaded from JSON should be equal to object indexdata' ) # test with basic auth testuser, testpass = '******', 'testpass' token = base64.b64encode( force_bytes('%s:%s' % (testuser, testpass))) self.request.META['HTTP_AUTHORIZATION'] = 'Basic %s' % force_text( token) with patch('eulfedora.indexdata.views.TypeInferringRepository' ) as typerepo: typerepo.return_value.get_object.return_value.index_data.return_value = {} index_data(self.request, testobj.pid) typerepo.assert_called_with(username=testuser, password=testpass) # non-existent pid should generate a 404 self.assertRaises(Http404, index_data, self.request, 'bogus:testpid')
def test_ingest(self): obj = self.loadFixtureData('basic-object.foxml') r = self.rest_api.ingest(obj) pid = r.content self.assertTrue(pid) self.rest_api.purgeObject(force_text(pid)) # test ingesting with log message r = self.rest_api.ingest(obj, "this is my test ingest message") pid = r.text # ingest message is stored in AUDIT datastream # - can currently only be accessed by retrieving entire object xml r = self.rest_api.getObjectXML(pid) self.assertTrue("this is my test ingest message" in r.text) self.rest_api.purgeObject(pid, "removing test ingest object")
def test_ingest(self): obj = self.loadFixtureData("basic-object.foxml") r = self.rest_api.ingest(obj) pid = r.content self.assertTrue(pid) self.rest_api.purgeObject(force_text(pid)) # test ingesting with log message r = self.rest_api.ingest(obj, "this is my test ingest message") pid = r.text # ingest message is stored in AUDIT datastream # - can currently only be accessed by retrieving entire object xml r = self.rest_api.getObjectXML(pid) self.assertTrue("this is my test ingest message" in r.text) self.rest_api.purgeObject(pid, "removing test ingest object")
def curl_download_file(pid, dsid): repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD) obj = repo.get_object(pid) ds = obj.getDatastreamObject(dsid) tmpfile = tempfile.NamedTemporaryFile(prefix='%s-%s_' % (pid, dsid), delete=False) print('writing to ', tmpfile.name) widgets = [ 'Download: ', progressbar.widgets.Percentage(), ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(), ' ', progressbar.widgets.FileTransferSpeed() ] # set initial progressbar size based on file; will be slightly larger because # of multipart boundary content pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start() def progress(dl_total, dl, up_total, up): # update current status pbar.update(dl) c = pycurl.Curl() auth = base64.b64encode( force_bytes("%s:%s" % (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD))) headers = {'Authorization': 'Basic %s' % force_text(auth)} c.setopt(pycurl.VERBOSE, 1) c.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()]) # /objects/{pid}/datastreams/{dsID}/content ? [asOfDateTime] [download] c.setopt(c.URL, '%sobjects/%s/datastreams/%s/content' % \ (testsettings.FEDORA_ROOT_NONSSL, pid, dsid)) # c.setopt(c.WRITEDATA, buffer) c.setopt(c.WRITEFUNCTION, tmpfile.write) c.setopt(c.XFERINFOFUNCTION, progress) c.setopt(c.NOPROGRESS, False) c.perform() # HTTP response code, e.g. 200. print('Status: %d' % c.getinfo(c.RESPONSE_CODE)) # Elapsed time for the transfer. print('Time: %f' % c.getinfo(c.TOTAL_TIME)) c.close()
def test_index_data(self): # create a test object for testing index data view repo = Repository() testobj = repo.get_object(type=SimpleObject) testobj.label = "test object" testobj.owner = "tester" testobj.save() self.pids.append(testobj.pid) # test with request IP not allowed to access the service with override_settings(EUL_INDEXER_ALLOWED_IPS=["0.13.23.134"]): response = index_data(self.request, testobj.pid) expected, got = 403, response.status_code self.assertEqual( expected, got, "Expected %s but returned %s for index_data view with request IP not in configured list" % (expected, got), ) # test with request IP allowed to hit the service with override_settings(EUL_INDEXER_ALLOWED_IPS=[self.request_ip]): response = index_data(self.request, testobj.pid) expected, got = 200, response.status_code self.assertEqual(expected, got, "Expected %s but returned %s for index_data view" % (expected, got)) expected, got = "application/json", response["Content-Type"] self.assertEqual( expected, got, "Expected %s but returned %s for mimetype on index_data view" % (expected, got) ) response_data = json.loads(response.content.decode("utf-8")) self.assertEqual( testobj.index_data(), response_data, "Response content loaded from JSON should be equal to object indexdata", ) # test with basic auth testuser, testpass = "******", "testpass" token = base64.b64encode(force_bytes("%s:%s" % (testuser, testpass))) self.request.META["HTTP_AUTHORIZATION"] = "Basic %s" % force_text(token) with patch("eulfedora.indexdata.views.TypeInferringRepository") as typerepo: typerepo.return_value.get_object.return_value.index_data.return_value = {} index_data(self.request, testobj.pid) typerepo.assert_called_with(username=testuser, password=testpass) # non-existent pid should generate a 404 self.assertRaises(Http404, index_data, self.request, "bogus:testpid")
def get_datastream_info(self, dsinfo): '''Use regular expressions to pull datastream [version] details (id, mimetype, size, and checksum) for binary content, in order to sanity check the decoded data. :param dsinfo: text content just before a binaryContent tag :returns: dict with keys for id, mimetype, size, type and digest, or None if no match is found ''' # we only need to look at the end of this section of content dsinfo = dsinfo[-250:] # if not enough content is present, include the end of # the last read chunk, if available if len(dsinfo) < 250 and self.end_of_last_chunk is not None: dsinfo = self.end_of_last_chunk + dsinfo infomatch = self.dsinfo_regex.search(force_text(dsinfo)) if infomatch: return infomatch.groupdict()
def curl_download_file(pid, dsid): repo = Repository(testsettings.FEDORA_ROOT_NONSSL, testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD) obj = repo.get_object(pid) ds = obj.getDatastreamObject(dsid) tmpfile = tempfile.NamedTemporaryFile( prefix='%s-%s_' % (pid, dsid), delete=False) print('writing to ', tmpfile.name) widgets = ['Download: ', progressbar.widgets.Percentage(), ' ', progressbar.widgets.Bar(), ' ', progressbar.widgets.ETA(), ' ', progressbar.widgets.FileTransferSpeed()] # set initial progressbar size based on file; will be slightly larger because # of multipart boundary content pbar = progressbar.ProgressBar(widgets=widgets, max_value=ds.size).start() def progress(dl_total, dl, up_total, up): # update current status pbar.update(dl) c = pycurl.Curl() auth = base64.b64encode(force_bytes("%s:%s" % (testsettings.FEDORA_USER, testsettings.FEDORA_PASSWORD))) headers = {'Authorization' : 'Basic %s' % force_text(auth)} c.setopt(pycurl.VERBOSE, 1) c.setopt(pycurl.HTTPHEADER, ["%s: %s" % t for t in headers.items()]) # /objects/{pid}/datastreams/{dsID}/content ? [asOfDateTime] [download] c.setopt(c.URL, '%sobjects/%s/datastreams/%s/content' % \ (testsettings.FEDORA_ROOT_NONSSL, pid, dsid)) # c.setopt(c.WRITEDATA, buffer) c.setopt(c.WRITEFUNCTION, tmpfile.write) c.setopt(c.XFERINFOFUNCTION, progress) c.setopt(c.NOPROGRESS, False) c.perform() # HTTP response code, e.g. 200. print('Status: %d' % c.getinfo(c.RESPONSE_CODE)) # Elapsed time for the transfer. print('Time: %f' % c.getinfo(c.TOTAL_TIME)) c.close()
def test_login_and_store_credentials_in_session(self): # only testing custom logic, which happens on POST # everything else is handled by django.contrib.auth mockrequest = Mock() mockrequest.method = 'POST' def not_logged_in(rqst): rqst.user.is_authenticated.return_value = False def set_logged_in(rqst): rqst.user.is_authenticated.return_value = True rqst.POST.get.return_value = "TEST_PASSWORD" # failed login with patch('eulfedora.views.authviews.login', new=Mock(side_effect=not_logged_in)): mockrequest.session = dict() response = login_and_store_credentials_in_session(mockrequest) self.assert_( FEDORA_PASSWORD_SESSION_KEY not in mockrequest.session, 'user password for fedora should not be stored in session on failed login' ) # successful login with patch('eulfedora.views.authviews.login', new=Mock(side_effect=set_logged_in)): response = login_and_store_credentials_in_session(mockrequest) self.assert_( FEDORA_PASSWORD_SESSION_KEY in mockrequest.session, 'user password for fedora should be stored in session on successful login' ) # test password stored in the mock request pwd = mockrequest.POST.get() # encrypted password stored in session sessionpwd = mockrequest.session[FEDORA_PASSWORD_SESSION_KEY] self.assertNotEqual( pwd, sessionpwd, 'password should not be stored in the session without encryption' ) self.assertEqual(pwd, force_text(cryptutil.decrypt(sessionpwd)), 'user password stored in session is encrypted')
def test_addRelationship(self): # rel to resource added = self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid, force_text(modelns.hasModel), 'info:fedora/pid:123', False) self.assertTrue(added) r = self.rest_api.getDatastreamDissemination(self.pid, 'RELS-EXT') self.assert_('<hasModel' in r.text) self.assert_('rdf:resource="info:fedora/pid:123"' in r.text) # literal added = self.rest_api.addRelationship(self.pid, 'info:fedora/%s' % self.pid, self.rel_owner, "johndoe", True) self.assertTrue(added) r = self.rest_api.getDatastreamDissemination(self.pid, 'RELS-EXT') self.assert_('<owner' in r.text) self.assert_('>johndoe<' in r.text) # bogus pid self.assertRaises(RequestFailed, self.rest_api.addRelationship, 'bogus:pid', 'info:fedora/bogus:pid', self.rel_owner, 'johndoe', True)
def append_pid(self, pid): self.fedora_fixtures_ingested.append(force_text(pid))
def sync_object( src_obj, dest_repo, export_context="migrate", overwrite=False, show_progress=False, requires_auth=False ): """Copy an object from one repository to another using the Fedora export functionality. :param src_obj: source :class:`~eulfedora.models.DigitalObject` to be copied :param dest_repo: destination :class:`~eulfedora.server.Repository` where the object will be copied to :param export_context: Fedora export format to use, one of "migrate" or "archive"; migrate is generally faster, but requires access from destination repository to source and may result in checksum errors for some content; archive exports take longer to process (default: migrate) :param overwrite: if an object with the same pid is already present in the destination repository, it will be removed only if overwrite is set to true (default: false) :param show_progress: if True, displays a progress bar with content size, progress, speed, and ETA (only applicable to archive exports) :param requires_auth: content datastreams require authentication, and should have credentials patched in (currently only supported in archive-xml export mode) (default: False) :returns: result of Fedora ingest on the destination repository on success """ # NOTE: currently exceptions are expected to be handled by the # calling method; see repo-cp script for an example if show_progress and progressbar: # calculate rough estimate of object size size_estimate = estimate_object_size(src_obj, archive=(export_context in ["archive", "archive-xml"])) # create a new progress bar with current pid and size widgets = [ src_obj.pid, " Estimated size: %s // " % humanize_file_size(size_estimate), "Read: ", progressbar.widgets.DataSize(), " ", progressbar.widgets.AdaptiveTransferSpeed(), " ", "| Uploaded: ", progressbar.widgets.DataSize(value="upload"), " // ", # FileTransferSpeed('upload'), currently no way to track upload speed... progressbar.widgets.Timer(), " | ", progressbar.widgets.AdaptiveETA(), ] class DownUpProgressBar(progressbar.ProgressBar): upload = 0 def data(self): data = super(DownUpProgressBar, self).data() data["upload"] = self.upload return data pbar = DownUpProgressBar(widgets=widgets, max_value=size_estimate) else: pbar = None # migrate export can simply be read and uploaded to dest fedora if export_context == "migrate": response = src_obj.api.export(src_obj, context=export_context, stream=True) export_data = response.iter_content(4096 * 1024) # archive export needs additional processing to handle large binary content elif export_context in ["archive", "archive-xml"]: export = ArchiveExport( src_obj, dest_repo, progress_bar=pbar, requires_auth=requires_auth, xml_only=(export_context == "archive-xml"), ) # NOTE: should be possible to pass BytesIO to be read, but that is failing export_data = export.object_data().getvalue() else: raise Exception("Unsupported export context %s", export_context) dest_obj = dest_repo.get_object(src_obj.pid) if dest_obj.exists: if overwrite: dest_repo.purge_object(src_obj.pid) else: # exception ? return False result = dest_repo.ingest(export_data) if pbar: pbar.finish() return force_text(result)
def sync_object(src_obj, dest_repo, export_context='migrate', overwrite=False, show_progress=False, requires_auth=False): '''Copy an object from one repository to another using the Fedora export functionality. :param src_obj: source :class:`~eulfedora.models.DigitalObject` to be copied :param dest_repo: destination :class:`~eulfedora.server.Repository` where the object will be copied to :param export_context: Fedora export format to use, one of "migrate" or "archive"; migrate is generally faster, but requires access from destination repository to source and may result in checksum errors for some content; archive exports take longer to process (default: migrate) :param overwrite: if an object with the same pid is already present in the destination repository, it will be removed only if overwrite is set to true (default: false) :param show_progress: if True, displays a progress bar with content size, progress, speed, and ETA (only applicable to archive exports) :param requires_auth: content datastreams require authentication, and should have credentials patched in (currently only supported in archive-xml export mode) (default: False) :returns: result of Fedora ingest on the destination repository on success ''' # NOTE: currently exceptions are expected to be handled by the # calling method; see repo-cp script for an example if show_progress and progressbar: # calculate rough estimate of object size size_estimate = estimate_object_size( src_obj, archive=(export_context in ['archive', 'archive-xml'])) # create a new progress bar with current pid and size widgets = [ src_obj.pid, ' Estimated size: %s // ' % humanize_file_size(size_estimate), 'Read: ', progressbar.widgets.DataSize(), ' ', progressbar.widgets.AdaptiveTransferSpeed(), ' ', '| Uploaded: ', progressbar.widgets.DataSize(value='upload'), ' // ', # FileTransferSpeed('upload'), currently no way to track upload speed... progressbar.widgets.Timer(), ' | ', progressbar.widgets.AdaptiveETA() ] class DownUpProgressBar(progressbar.ProgressBar): upload = 0 def data(self): data = super(DownUpProgressBar, self).data() data['upload'] = self.upload return data pbar = DownUpProgressBar(widgets=widgets, max_value=size_estimate) else: pbar = None # migrate export can simply be read and uploaded to dest fedora if export_context == 'migrate': response = src_obj.api.export(src_obj, context=export_context, stream=True) export_data = response.iter_content(4096 * 1024) # archive export needs additional processing to handle large binary content elif export_context in ['archive', 'archive-xml']: export = ArchiveExport(src_obj, dest_repo, progress_bar=pbar, requires_auth=requires_auth, xml_only=(export_context == 'archive-xml')) # NOTE: should be possible to pass BytesIO to be read, but that is failing export_data = export.object_data().getvalue() else: raise Exception('Unsupported export context %s', export_context) dest_obj = dest_repo.get_object(src_obj.pid) if dest_obj.exists: if overwrite: dest_repo.purge_object(src_obj.pid) else: # exception ? return False result = dest_repo.ingest(export_data) if pbar: pbar.finish() return force_text(result)
def sync_object(src_obj, dest_repo, export_context='migrate', overwrite=False, show_progress=False, requires_auth=False, omit_checksums=False): '''Copy an object from one repository to another using the Fedora export functionality. :param src_obj: source :class:`~eulfedora.models.DigitalObject` to be copied :param dest_repo: destination :class:`~eulfedora.server.Repository` where the object will be copied to :param export_context: Fedora export format to use, one of "migrate" or "archive"; migrate is generally faster, but requires access from destination repository to source and may result in checksum errors for some content; archive exports take longer to process (default: migrate) :param overwrite: if an object with the same pid is already present in the destination repository, it will be removed only if overwrite is set to true (default: false) :param show_progress: if True, displays a progress bar with content size, progress, speed, and ETA (only applicable to archive exports) :param requires_auth: content datastreams require authentication, and should have credentials patched in (currently only supported in archive-xml export mode) (default: False) :param omit_checksums: scrubs contentDigest -- aka checksums -- from datastreams; helpful for datastreams with Redirect (R) or External (E) contexts (default: False) :returns: result of Fedora ingest on the destination repository on success ''' # NOTE: currently exceptions are expected to be handled by the # calling method; see repo-cp script for an example if show_progress and progressbar: # calculate rough estimate of object size size_estimate = estimate_object_size(src_obj, archive=(export_context in ['archive', 'archive-xml'])) # create a new progress bar with current pid and size widgets = [src_obj.pid, ' Estimated size: %s // ' % humanize_file_size(size_estimate), 'Read: ', progressbar.widgets.DataSize(), ' ', progressbar.widgets.AdaptiveTransferSpeed(), ' ', '| Uploaded: ', progressbar.widgets.DataSize(value='upload'), ' // ', # FileTransferSpeed('upload'), currently no way to track upload speed... progressbar.widgets.Timer(), ' | ', progressbar.widgets.AdaptiveETA() ] class DownUpProgressBar(progressbar.ProgressBar): upload = 0 def data(self): data = super(DownUpProgressBar, self).data() data['upload'] = self.upload return data pbar = DownUpProgressBar(widgets=widgets, max_value=size_estimate) else: pbar = None # migrate export can simply be read and uploaded to dest fedora if export_context == 'migrate': response = src_obj.api.export(src_obj, context=export_context, stream=True) export_data = response.iter_content(4096*1024) # archive export needs additional processing to handle large binary content elif export_context in ['archive', 'archive-xml']: export = ArchiveExport(src_obj, dest_repo, progress_bar=pbar, requires_auth=requires_auth, xml_only=(export_context == 'archive-xml')) # NOTE: should be possible to pass BytesIO to be read, but that is failing export_data = export.object_data().getvalue() else: raise Exception('Unsupported export context %s', export_context) # wipe checksums from FOXML if flagged in options if omit_checksums: checksum_re = r'<foxml:contentDigest.+?/>' try: # export data is either a string export_data = re.sub(checksum_re, '', export_data) except TypeError: # or a generator export_data = (re.sub(checksum_re, '', chunk) for chunk in export_data) dest_obj = dest_repo.get_object(src_obj.pid) if dest_obj.exists: if overwrite: dest_repo.purge_object(src_obj.pid) else: # exception ? return False result = dest_repo.ingest(export_data) if pbar: pbar.finish() return force_text(result)
def test_upload_string(self): data = "Here is some temporary content to upload to fedora." upload_id = self.rest_api.upload(data) # current format looks like uploaded://#### pattern = re.compile("uploaded://[0-9]+") self.assert_(pattern.match(force_text(upload_id)))
def test_raw_datastream_old(self): rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} # rqst.META.get.return_value = None # DC response = raw_datastream_old(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code content = force_text(response.content) self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of DC' \ % (expected, got)) self.assertEqual(self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in content) # RELS-EXT response = raw_datastream_old(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream_old(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertTrue(response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams') self.assert_(isinstance(response, HttpResponse)) # streaming response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE', streaming=True) self.assert_(isinstance(response, StreamingHttpResponse)) # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream_old, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream_old, rqst, 'bogus-pid:1', 'DC') # check type handling? # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream_old(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream_old(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # HEAD request is handled internally, for efficiency rqst.method = 'HEAD' response = raw_datastream_old(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream_old view' \ % (expected, got)) self.assertEqual(b'', response.content)
def test_raw_datastream(self): # tests for new version of raw_datastream introduced in 1.5, # based on old raw_datastream tests rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} # DC response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code content = b''.join(c for c in response.streaming_content) self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of DC' \ % (expected, got)) self.assertEqual( self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in force_text(content)) # RELS-EXT response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertTrue( response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams' ) self.assert_(isinstance(response, StreamingHttpResponse)) # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC') # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream_old(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # test HEAD request rqst.method = 'HEAD' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream view' \ % (expected, got)) self.assert_(isinstance(response, HttpResponse)) self.assertEqual(b'', response.content) # test that range requests are passed through to fedora # use IMAGE for testing since it is binary content # set range header in the request; bytes=0- : entire datastream rqst.META['HTTP_RANGE'] = 'bytes=0-' rqst.method = 'GET' response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content = b''.join(c for c in response.streaming_content) self.assertEqual(self.obj.image.size, len(content), 'range request of bytes=0- should return entire content (expected %d, got %d)' \ % (self.obj.image.size, len(content))) self.assertEqual(self.obj.image.size, int(response['Content-Length']), 'content-length header should be size of entire content (expected %d, got %d)' \ % (self.obj.image.size, int(response['Content-Length']))) expected = 'bytes 0-%d/%d' % (self.obj.image.size - 1, self.obj.image.size) self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) del response # set range request for partial beginning content; bytes=0-150 bytes_requested = 'bytes=0-150' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content_len = 151 content = b''.join(c for c in response.streaming_content) self.assertEqual(content_len, len(content), 'range request of %s should return %d bytes, got %d' \ % (bytes_requested, content_len, len(content))) self.assertEqual(content_len, int(response['Content-Length']), 'content-length header should be set to partial size %d (got %d)' \ % (content_len, int(response['Content-Length']))) expected = 'bytes 0-150/%d' % self.obj.image.size self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) # complex ranges not yet supported bytes_requested = 'bytes=1-10,30-50' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 416, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old invalid range request %s' \ % (expected, got, bytes_requested))
def test_raw_datastream_old(self): rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} # rqst.META.get.return_value = None # DC response = raw_datastream_old(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code content = force_text(response.content) self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of DC' \ % (expected, got)) self.assertEqual( self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in content) # RELS-EXT response = raw_datastream_old(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream_old(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream_old view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual( self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response' ) self.assertTrue( response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams' ) self.assert_(isinstance(response, HttpResponse)) # streaming response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE', streaming=True) self.assert_(isinstance(response, StreamingHttpResponse)) # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream_old, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream_old, rqst, 'bogus-pid:1', 'DC') # check type handling? # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream_old(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream_old(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # HEAD request is handled internally, for efficiency rqst.method = 'HEAD' response = raw_datastream_old(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream_old view' \ % (expected, got)) self.assertEqual(b'', response.content)
def test_upload_string(self): data = "Here is some temporary content to upload to fedora." upload_id = self.rest_api.upload(data) # current format looks like uploaded://#### pattern = re.compile('uploaded://[0-9]+') self.assert_(pattern.match(force_text(upload_id)))
def test_raw_datastream(self): # tests for new version of raw_datastream introduced in 1.5, # based on old raw_datastream tests rqst = Mock() rqst.method = 'GET' # return empty headers for ETag condition check rqst.META = {} # DC response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code content = b''.join(c for c in response.streaming_content) self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of DC' \ % (expected, got)) expected, got = 'text/xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of DC' \ % (expected, got)) self.assertEqual(self.obj.dc.checksum, response['ETag'], 'datastream checksum should be set as ETag header in the response') self.assertEqual(self.obj.dc.checksum, response['Content-MD5']) self.assert_('<dc:title>%s</dc:title>' % self.obj.dc.content.title in force_text(content)) # RELS-EXT response = raw_datastream(rqst, self.obj.pid, 'RELS-EXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of RELS-EXT' \ % (expected, got)) expected, got = 'application/rdf+xml', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of RELS-EXT' \ % (expected, got)) # TEXT (non-xml content) response = raw_datastream(rqst, self.obj.pid, 'TEXT') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of TEXT' \ % (expected, got)) expected, got = 'text/plain', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of TEXT' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.text.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertEqual(len(self.obj.text.content), int(response['Content-Length'])) # IMAGE (binary content) response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream view of IMAGE' \ % (expected, got)) expected, got = 'image/png', response['Content-Type'] self.assertEqual(expected, got, 'Expected %s but returned %s for mimetype on raw_datastream view of IMAGE' \ % (expected, got)) # non-xml datastreams should have content-md5 & content-length headers self.assertEqual(self.obj.image.checksum, response['Content-MD5'], 'datastream checksum should be set as Content-MD5 header in the response') self.assertTrue(response.has_header('Content-Length'), 'content-length header should be set in the response for binary datastreams') self.assert_(isinstance(response, StreamingHttpResponse)) # non-existent datastream should 404 self.assertRaises(Http404, raw_datastream, rqst, self.obj.pid, 'BOGUS-DSID') # non-existent record should 404 self.assertRaises(Http404, raw_datastream, rqst, 'bogus-pid:1', 'DC') # set extra headers in the response extra_headers = {'Content-Disposition': 'attachment; filename=foo.txt'} response = raw_datastream_old(rqst, self.obj.pid, 'TEXT', headers=extra_headers) self.assertTrue(response.has_header('Content-Disposition')) self.assertEqual(response['Content-Disposition'], extra_headers['Content-Disposition']) # explicitly support GET and HEAD requests only rqst.method = 'POST' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 405, response.status_code self.assertEqual(expected, got, 'Expected %s (Method not Allowed) but returned %s for POST to raw_datastream view' \ % (expected, got)) # test HEAD request rqst.method = 'HEAD' response = raw_datastream(rqst, self.obj.pid, 'DC') expected, got = 200, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for HEAD request on raw_datastream view' \ % (expected, got)) self.assert_(isinstance(response, HttpResponse)) self.assertEqual(b'', response.content) # test that range requests are passed through to fedora # use IMAGE for testing since it is binary content # set range header in the request; bytes=0- : entire datastream rqst.META['HTTP_RANGE'] = 'bytes=0-' rqst.method = 'GET' response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content = b''.join(c for c in response.streaming_content) self.assertEqual(self.obj.image.size, len(content), 'range request of bytes=0- should return entire content (expected %d, got %d)' \ % (self.obj.image.size, len(content))) self.assertEqual(self.obj.image.size, int(response['Content-Length']), 'content-length header should be size of entire content (expected %d, got %d)' \ % (self.obj.image.size, int(response['Content-Length']))) expected = 'bytes 0-%d/%d' % (self.obj.image.size - 1, self.obj.image.size) self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) del response # set range request for partial beginning content; bytes=0-150 bytes_requested = 'bytes=0-150' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream(rqst, self.obj.pid, 'IMAGE') expected, got = 206, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream range request' \ % (expected, got)) content_len = 151 content = b''.join(c for c in response.streaming_content) self.assertEqual(content_len, len(content), 'range request of %s should return %d bytes, got %d' \ % (bytes_requested, content_len, len(content))) self.assertEqual(content_len, int(response['Content-Length']), 'content-length header should be set to partial size %d (got %d)' \ % (content_len, int(response['Content-Length']))) expected = 'bytes 0-150/%d' % self.obj.image.size self.assertEqual(expected, response['Content-Range'], 'content range response header should indicate bytes returned (expected %s, got %s)' \ % (expected, response['Content-Range'])) # complex ranges not yet supported bytes_requested = 'bytes=1-10,30-50' rqst.META['HTTP_RANGE'] = bytes_requested response = raw_datastream_old(rqst, self.obj.pid, 'IMAGE', accept_range_request=True) expected, got = 416, response.status_code self.assertEqual(expected, got, 'Expected %s but returned %s for raw_datastream_old invalid range request %s' \ % (expected, got, bytes_requested))