def _archive_one_file( self, bucket, key_name, replace, size, ): key = Key(bucket) key.name = key_name self._log.info("_archive_one_file ({0} {1} ...) versioning={2}".format( bucket.name, key.name, bucket.versioning, )) bucket_accounting = self._bucket_accounting[bucket.name] retry_count = 0 force_error = random.randint(0, 99) < self._archive_failure_percent while not self._halt_event.is_set(): bucket_accounting.increment_by("archive_request", 1) input_file = MockInputFile(size, force_error) try: key.set_contents_from_file(input_file, replace=replace) except MockInputFileError: bucket_accounting.increment_by("archive_error", 1) self._log.info("MockInputFileError") return except LumberyardRetryableHTTPError, instance: bucket_accounting.increment_by("archive_error", 1) if retry_count >= _max_archive_retries: raise self._log.warn("%s: retry in %s seconds" % ( instance, instance.retry_after, )) self._halt_event.wait(timeout=instance.retry_after) retry_count += 1 self._log.warn("retry #%s" % (retry_count, )) continue verification_key = ( bucket.name, key.name, key.version_id, ) self._log.info("archived {0}".format(verification_key)) if verification_key in self.key_verification: self._log.error("_archive_one_file duplicate key %s" % (verification_key, )) bucket_accounting.increment_by("archive_success", 1) # we count this as 'bytes in' because that's what the server counts bucket_accounting.increment_by("success_bytes_in", size) self.key_verification[verification_key] = \ (size, input_file.md5_digest, ) break
def _bucket_with_unauth_locations(self, access_control): log = logging.getLogger("_bucket_with_unauth_locations") access_control_json = json.dumps(access_control) s3_connection = motoboto.S3Emulator() # create the bucket bucket = s3_connection.create_unique_bucket( access_control=access_control_json) self.assertTrue(bucket is not None) # the bucket's authenticated connection should be able to list keys _ = bucket.get_all_keys() # in location an unauthenticated connection should be denied list_access with self.assertRaises(LumberyardHTTPError) as context_manager: _ = _list_keys(bucket.name) self.assertEqual(context_manager.exception.status, 401) # the bucket's authenticated connection should be able to write auth_key_name = "authenticated_key" auth_test_string = "authenticated test string" write_key = Key(bucket) write_key.name = auth_key_name write_key.set_contents_from_string(auth_test_string) self.assertTrue(write_key.exists()) # an unauthenticated connection should also be able to write unauth_key_name = "unauthenticated_key" unauth_test_string = "unauth test string" archive_result = _archive_key_from_string(bucket.name, unauth_key_name, unauth_test_string) self.assertTrue("version_identifier" in archive_result) head_result = _head_key(bucket.name, unauth_key_name) log.info("head_result = {0}".format(head_result)) # the bucket's authenticated connection should be able to read read_key = Key(bucket, auth_key_name) returned_string = read_key.get_contents_as_string() self.assertEqual(returned_string.decode("utf-8"), auth_test_string) # an unauthenticated connection should also be able to read returned_string = _retrieve_key_to_string(bucket.name, unauth_key_name) self.assertEqual(returned_string.decode("utf-8"), unauth_test_string) # the bucket's authenticated connection should be able to delete read_key.delete() # an unauthenticated connection should also be able to delete delete_result = _delete_key(bucket.name, unauth_key_name) self.assertTrue(delete_result["success"]) # delete the bucket s3_connection.delete_bucket(bucket.name) s3_connection.close()
def _bucket_without_unauth_access(self, access_control): if access_control is None: access_control_json = None else: access_control_json = json.dumps(access_control) s3_connection = motoboto.S3Emulator() # create the bucket bucket = s3_connection.create_unique_bucket( access_control=access_control_json) # the bucket's authenticated connection should be able to list keys _ = bucket.get_all_keys() # an unauthenticated connection should be denied list_access with self.assertRaises(LumberyardHTTPError) as context_manager: _ = _list_keys(bucket.name) self.assertEqual(context_manager.exception.status, 401) # the bucket's authenticated connection should be able to write auth_key_name = "authenticated_key" auth_test_string = "authenticated test string" write_key = Key(bucket) write_key.name = auth_key_name write_key.set_contents_from_string(auth_test_string) self.assertTrue(write_key.exists()) # an unauthenticated connection should be denied write_access unauth_key_name = "unauthenticated_key" unauth_test_string = "unauth test string" with self.assertRaises(LumberyardHTTPError) as context_manager: _ = _archive_key_from_string(bucket.name, unauth_key_name, unauth_test_string) self.assertEqual(context_manager.exception.status, 401) # the bucket's authenticated connection should be able to read read_key = Key(bucket, auth_key_name) returned_string = read_key.get_contents_as_string() self.assertEqual(returned_string.decode("utf-8"), auth_test_string) # an unauthenticated connection should be denied read_access with self.assertRaises(LumberyardHTTPError) as context_manager: _ = _retrieve_key_to_string(bucket.name, unauth_key_name) self.assertEqual(context_manager.exception.status, 401) # the bucket's authenticated connection should be able to delete read_key.delete() # an unauthenticated connection should be denied delete_access with self.assertRaises(LumberyardHTTPError) as context_manager: _ = _delete_key(bucket.name, unauth_key_name) self.assertEqual(context_manager.exception.status, 401) # delete the bucket s3_connection.delete_bucket(bucket.name) s3_connection.close()
def test_fast_upload(self): """ the fastest upload we can manage """ key_name = "test-key" sequence_size = 3 * 10 * 1024 * 1024 sequence_count = 1 total_seconds = 0.0 write_key = Key(self._s3_connection.default_bucket) write_key.name = key_name test_file = MockInputFile(sequence_size, sequence_count, total_seconds) write_key.set_contents_from_file(test_file) self.assertTrue(write_key.exists())
def test_well_behaved_upload(self): """ an upload that sends sequences of 10mb """ key_name = "test-key" sequence_size = 10 * 1024 * 1024 sequence_count = 3 total_seconds = 3 * 301 write_key = Key(self._s3_connection.default_bucket) write_key.name = key_name test_file = MockInputFile(sequence_size, sequence_count, total_seconds) write_key.set_contents_from_file(test_file) self.assertTrue(write_key.exists())
def _archive_one_file( self, bucket, key_name, replace, size, ): key = Key(bucket) key.name = key_name self._log.info("_archive_one_file ({0} {1} ...) versioning={2}".format( bucket.name, key.name, bucket.versioning, )) bucket_accounting = self._bucket_accounting[bucket.name] retry_count = 0 force_error = random.randint(0, 99) < self._archive_failure_percent while not self._halt_event.is_set(): bucket_accounting.increment_by("archive_request", 1) input_file = MockInputFile(size, force_error) try: key.set_contents_from_file(input_file, replace=replace) except MockInputFileError: bucket_accounting.increment_by("archive_error", 1) self._log.info("MockInputFileError") return except LumberyardRetryableHTTPError, instance: bucket_accounting.increment_by("archive_error", 1) if retry_count >= _max_archive_retries: raise self._log.warn("%s: retry in %s seconds" % ( instance, instance.retry_after, )) self._halt_event.wait(timeout=instance.retry_after) retry_count += 1 self._log.warn("retry #%s" % (retry_count, )) continue verification_key = (bucket.name, key.name, key.version_id, ) self._log.info("archived {0}".format(verification_key)) if verification_key in self.key_verification: self._log.error("_archive_one_file duplicate key %s" % ( verification_key, )) bucket_accounting.increment_by("archive_success", 1) # we count this as 'bytes in' because that's what the server counts bucket_accounting.increment_by("success_bytes_in", size) self.key_verification[verification_key] = \ (size, input_file.md5_digest, ) break
def _set_up_single_archive(self): key_name = "test-key" test_file_path = os.path.join( test_dir_path, "test-orignal" ) test_file_size = 1024 ** 2 test_data = os.urandom(test_file_size) with open(test_file_path, "wb") as output_file: output_file.write(test_data) # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "rb") as archive_file: write_key.set_contents_from_file(archive_file) self.assertTrue(write_key.exists()) return test_data, write_key
def test_slow_upload(self): """ an upload that sends less than 10mb in 5 min """ key_name = "test-key" sequence_size = 1024 sequence_count = 3 * 10 total_seconds = 3 * 10 * 60 write_key = Key(self._s3_connection.default_bucket) write_key.name = key_name test_file = MockInputFile(sequence_size, sequence_count, total_seconds) with self.assertRaises(LumberyardHTTPError) as context_manager: write_key.set_contents_from_file(test_file) self.assertEqual(context_manager.exception.status, REQUEST_TIMEOUT)
def test_key_with_strings(self): """ test simple key 'from_string' and 'as_string' functions """ bucket_name = "com-dougfort-test-key-with-strings" key_name = u"test-key" test_string = _random_string(1024) # create the bucket bucket = self._s3_connection.create_bucket(bucket_name) self.assertTrue(bucket is not None) self.assertEqual(bucket.name, bucket_name) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name # self.assertFalse(write_key.exists()) # upload some data write_key.set_contents_from_string(test_string) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data returned_string = read_key.get_contents_as_string() self.assertEqual(returned_string, test_string) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket_name)
def archive_file(args, nimbusio_identity, file_name): log = logging.getLogger("archive_file") s3_emulator = motoboto.connect_s3(nimbusio_identity) if args.collection_name is None: bucket = s3_emulator.default_bucket else: bucket = s3_emulator.get_bucket(args.collection_name) key_name = "".join([args.prefix, file_name]) archive_key = Key(bucket, key_name) log.info("archiving {0} as key {1} to collection {2}".format(file_name, archive_key, bucket)) file_path = os.path.join(args.watch_path, file_name) with open(file_path, "rb") as input_file: archive_key.set_contents_from_file(input_file) log.info("archive successful version identifier = {0}".format( archive_key.version_id)) s3_emulator.close()
def test_interrupted_resumable(self): """ test get_contents_to_file with a simulated interruption. """ log = logging.getLogger("test_uninterrupted_resumable") key_name = "test-key" test_file_path = os.path.join(test_dir_path, "test-orignal") test_file_size = 1024**2 interrupted_size = 1024 * 42 test_data = os.urandom(test_file_size) log.debug("writing {0} bytes to {1}".format(test_file_size, test_file_path)) with open(test_file_path, "wb") as output_file: output_file.write(test_data) # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name # self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "rb") as archive_file: write_key.set_contents_from_file(archive_file) self.assertTrue(write_key.exists()) # create a ResumableDownloadHandler tracker_file_path = os.path.join(test_dir_path, "tracker-file") download_handler = ResumableDownloadHandler( tracker_file_name=tracker_file_path) retrieve_file_path = os.path.join(test_dir_path, "test_key_with_files-orignal") # copy some of the data to the retrieve file to simulate an # interrupted retrieve with open(retrieve_file_path, "wb") as output_file: output_file.write(test_data[interrupted_size:]) # spoof the resumable handler into thinking it has a retrieve # in progress download_handler._save_tracker_info(write_key) # resume the retrieve with open(retrieve_file_path, "wb") as retrieve_file: write_key.get_contents_to_file(retrieve_file, res_download_handler=\ download_handler) # read back the retrieved data with open(retrieve_file_path, "rb") as retrieve_file: retrieved_data = retrieve_file.read() self.assertEqual(len(retrieved_data), len(test_data)) self.assertTrue(retrieved_data == test_data) # delete the key write_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def test_uninterrupted_resumable(self): """ test get_contents_to_file without any interruption. """ log = logging.getLogger("test_uninterrupted_resumable") key_name = "test-key" test_file_path = os.path.join( test_dir_path, "test-orignal" ) test_file_size = 1024 ** 2 buffer_size = 1024 log.debug("writing {0} bytes to {1}".format(test_file_size, test_file_path)) bytes_written = 0 with open(test_file_path, "wb") as output_file: while bytes_written < test_file_size: output_file.write(os.urandom(buffer_size)) bytes_written += buffer_size # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "rb") as archive_file: write_key.set_contents_from_file(archive_file) self.assertTrue(write_key.exists()) # create a ResumableDownloadHandler tracker_file_path = os.path.join( test_dir_path, "tracker-file" ) download_handler = ResumableDownloadHandler( tracker_file_name=tracker_file_path ) # read back the data retrieve_file_path = os.path.join( test_dir_path, "test_key_with_files-orignal" ) with open(retrieve_file_path, "wb") as retrieve_file: write_key.get_contents_to_file(retrieve_file, res_download_handler=\ download_handler) self.assertTrue( filecmp.cmp(test_file_path, retrieve_file_path, shallow=False) ) # delete the key write_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def test_interrupted_resumable(self): """ test get_contents_to_file with a simulated interruption. """ log = logging.getLogger("test_uninterrupted_resumable") key_name = "test-key" test_file_path = os.path.join( test_dir_path, "test-orignal" ) test_file_size = 1024 ** 2 interrupted_size = 1024 * 42 test_data = os.urandom(test_file_size) log.debug("writing {0} bytes to {1}".format(test_file_size, test_file_path)) with open(test_file_path, "wb") as output_file: output_file.write(test_data) # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name # self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "rb") as archive_file: write_key.set_contents_from_file(archive_file) self.assertTrue(write_key.exists()) # create a ResumableDownloadHandler tracker_file_path = os.path.join( test_dir_path, "tracker-file" ) download_handler = ResumableDownloadHandler( tracker_file_name=tracker_file_path ) retrieve_file_path = os.path.join( test_dir_path, "test_key_with_files-orignal" ) # copy some of the data to the retrieve file to simulate an # interrupted retrieve with open(retrieve_file_path, "wb") as output_file: output_file.write(test_data[interrupted_size:]) # spoof the resumable handler into thinking it has a retrieve # in progress download_handler._save_tracker_info(write_key) # resume the retrieve with open(retrieve_file_path, "wb") as retrieve_file: write_key.get_contents_to_file(retrieve_file, res_download_handler=\ download_handler) # read back the retrieved data with open(retrieve_file_path, "rb") as retrieve_file: retrieved_data = retrieve_file.read() self.assertEqual(len(retrieved_data), len(test_data)) self.assertTrue(retrieved_data == test_data) # delete the key write_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def test_key_with_files_and_callback(self): """ test simple key 'from_file' and 'to_file' functions """ def _archive_callback(bytes_sent, total_bytes): print >> sys.stderr, "archived", str(bytes_sent), "out of", \ str(total_bytes) def _retrieve_callback(bytes_sent, total_bytes): print >> sys.stderr, "retrieved", str(bytes_sent), "out of", \ str(total_bytes) log = logging.getLogger("test_key_with_files") bucket_name = "com-dougfort-test-key-with-files-and-callback" key_name = "A" * 1024 test_file_path = os.path.join( _test_dir_path, "test_key_with_files-orignal" ) test_file_size = 1024 ** 2 buffer_size = 1024 log.debug("writing %s bytes to %s" % ( test_file_size, test_file_path, )) bytes_written = 0 with open(test_file_path, "w") as output_file: while bytes_written < test_file_size: output_file.write(_random_string(buffer_size)) bytes_written += buffer_size # create the bucket bucket = self._s3_connection.create_bucket(bucket_name) self.assertTrue(bucket is not None) self.assertEqual(bucket.name, bucket_name) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "r") as archive_file: write_key.set_contents_from_file( archive_file, cb=_archive_callback ) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data retrieve_file_path = os.path.join( _test_dir_path, "test_key_with_files-orignal" ) # 2011-08-08 dougfort boto aborts if you don't tell it the size read_key.size = test_file_size with open(retrieve_file_path, "w") as retrieve_file: read_key.get_contents_to_file( retrieve_file, cb=_retrieve_callback ) self.assertTrue( filecmp.cmp(test_file_path, retrieve_file_path, shallow=False) ) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket_name)
def test_key_with_files(self): """ test simple key 'from_file' and 'to_file' functions """ log = logging.getLogger("test_key_with_files") bucket_name = "com-dougfort-test-key-with-files" key_name = "A" * 1024 test_file_path = os.path.join( _test_dir_path, "test_key_with_files-orignal" ) test_file_size = 1024 ** 2 buffer_size = 1024 log.debug("writing %s bytes to %s" % ( test_file_size, test_file_path, )) bytes_written = 0 with open(test_file_path, "w") as output_file: while bytes_written < test_file_size: output_file.write(_random_string(buffer_size)) bytes_written += buffer_size # create the bucket bucket = self._s3_connection.create_bucket(bucket_name) self.assertTrue(bucket is not None) self.assertEqual(bucket.name, bucket_name) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "r") as archive_file: write_key.set_contents_from_file(archive_file) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data retrieve_file_path = os.path.join( _test_dir_path, "test_key_with_files-orignal" ) with open(retrieve_file_path, "w") as retrieve_file: read_key.get_contents_to_file(retrieve_file) self.assertTrue( filecmp.cmp(test_file_path, retrieve_file_path, shallow=False) ) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket_name)
def test_uninterrupted_resumable(self): """ test get_contents_to_file without any interruption. """ log = logging.getLogger("test_uninterrupted_resumable") key_name = "test-key" test_file_path = os.path.join(test_dir_path, "test-orignal") test_file_size = 1024**2 buffer_size = 1024 log.debug("writing {0} bytes to {1}".format(test_file_size, test_file_path)) bytes_written = 0 with open(test_file_path, "wb") as output_file: while bytes_written < test_file_size: output_file.write(os.urandom(buffer_size)) bytes_written += buffer_size # create the bucket bucket = self._s3_connection.create_unique_bucket() self.assertTrue(bucket is not None) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name self.assertFalse(write_key.exists()) # upload some data with open(test_file_path, "rb") as archive_file: write_key.set_contents_from_file(archive_file) self.assertTrue(write_key.exists()) # create a ResumableDownloadHandler tracker_file_path = os.path.join(test_dir_path, "tracker-file") download_handler = ResumableDownloadHandler( tracker_file_name=tracker_file_path) # read back the data retrieve_file_path = os.path.join(test_dir_path, "test_key_with_files-orignal") with open(retrieve_file_path, "wb") as retrieve_file: write_key.get_contents_to_file(retrieve_file, res_download_handler=\ download_handler) self.assertTrue( filecmp.cmp(test_file_path, retrieve_file_path, shallow=False)) # delete the key write_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket.name)
def xxxtest_simple_multipart(self): """ test a simple multipart upload """ log = logging.getLogger("test_simple_multipart") bucket_name = "com-dougfort-test-simple-multipart" key_name = "test_key" test_file_path = os.path.join( _test_dir_path, "test_simple_multipart-orignal" ) part_count = 2 # 5mb is the minimum size s3 will take test_file_size = 1024 ** 2 * 5 * part_count buffer_size = 1024 log.debug("writing %s bytes to %s" % ( test_file_size, test_file_path, )) bytes_written = 0 with open(test_file_path, "w") as output_file: while bytes_written < test_file_size: output_file.write(_random_string(buffer_size)) bytes_written += buffer_size # create the bucket bucket = self._s3_connection.create_bucket(bucket_name) self.assertTrue(bucket is not None) self.assertEqual(bucket.name, bucket_name) # assert that we have no uploads in progress upload_list = bucket.get_all_multipart_uploads() self.assertEqual(len(upload_list), 0) # start the multipart upload multipart_upload = bucket.initiate_multipart_upload(key_name) # assert that our upload is in progress upload_list = bucket.get_all_multipart_uploads() self.assertEqual(len(upload_list), 1) self.assertEqual(upload_list[0].id, multipart_upload.id) # upload a file in pieces current_pos = 0 part_size = int(test_file_size / part_count) for index in range(part_count): with open(test_file_path, "r") as input_file: input_file.seek(current_pos) data = input_file.read(part_size) upload_file = StringIO(data) multipart_upload.upload_part_from_file(upload_file, index+1) # complete the upload completed_upload = multipart_upload.complete_upload() print >> sys.stderr, dir(completed_upload) # delete the key key = Key(bucket, key_name) key.delete() # delete the bucket self._s3_connection.delete_bucket(bucket_name)
def xxxtest_key_with_meta(self): """ test simple key with metadata added """ bucket_name = "com-dougfort-test-key-with-meta" key_name = u"test-key" test_string = _random_string(1024) meta_key = u"meta_key" meta_value = "pork" # create the bucket bucket = self._s3_connection.create_bucket(bucket_name) self.assertTrue(bucket is not None) self.assertEqual(bucket.name, bucket_name) # create an empty key write_key = Key(bucket) # set the name write_key.name = key_name # self.assertFalse(write_key.exists()) # set some metadata write_key.set_metadata(meta_key, meta_value) # upload some data write_key.set_contents_from_string(test_string) self.assertTrue(write_key.exists()) # create another key with the same name read_key = Key(bucket, key_name) # read back the data returned_string = read_key.get_contents_as_string() self.assertEqual(returned_string, test_string) # get the metadata returned_meta_value = read_key.get_metadata(meta_key) self.assertEqual(returned_meta_value, meta_value) # delete the key read_key.delete() self.assertFalse(write_key.exists()) # delete the bucket self._s3_connection.delete_bucket(bucket_name)