예제 #1
0
    def _set_up_single_archive(self):
        key_name = "test-key"
        test_file_path = os.path.join(
            test_dir_path, "test-orignal"
        )
        test_file_size = 1024 ** 2

        test_data = os.urandom(test_file_size)

        with open(test_file_path, "wb") as output_file:
            output_file.write(test_data)

        # create the bucket
        bucket = self._s3_connection.create_unique_bucket()
        self.assertTrue(bucket is not None)

        # create an empty key
        write_key = Key(bucket)

        # set the name
        write_key.name = key_name
        self.assertFalse(write_key.exists())

        # upload some data
        with open(test_file_path, "rb") as archive_file:
            write_key.set_contents_from_file(archive_file)        
        self.assertTrue(write_key.exists())

        return test_data, write_key 
예제 #2
0
    def test_key_with_files(self):
        """
        test simple key 'from_file' and 'to_file' functions
        """
        log = logging.getLogger("test_key_with_files")
        bucket_name = "com-dougfort-test-key-with-files"
        key_name = "A" * 1024
        test_file_path = os.path.join(
            _test_dir_path, "test_key_with_files-orignal"
        )
        test_file_size = 1024 ** 2
        buffer_size = 1024

        log.debug("writing %s bytes to %s" % (
            test_file_size, test_file_path, 
        ))
        bytes_written = 0
        with open(test_file_path, "w") as output_file:
            while bytes_written < test_file_size:
                output_file.write(_random_string(buffer_size))
                bytes_written += buffer_size

        # create the bucket
        bucket = self._s3_connection.create_bucket(bucket_name)
        self.assertTrue(bucket is not None)
        self.assertEqual(bucket.name, bucket_name)

        # create an empty key
        write_key = Key(bucket)

        # set the name
        write_key.name = key_name
        self.assertFalse(write_key.exists())

        # upload some data
        with open(test_file_path, "r") as archive_file:
            write_key.set_contents_from_file(archive_file)        
        self.assertTrue(write_key.exists())

        # create another key with the same name 
        read_key = Key(bucket, key_name)

        # read back the data
        retrieve_file_path = os.path.join(
            _test_dir_path, "test_key_with_files-orignal"
        )
        with open(retrieve_file_path, "w") as retrieve_file:
            read_key.get_contents_to_file(retrieve_file)      
        self.assertTrue(
            filecmp.cmp(test_file_path, retrieve_file_path, shallow=False)
        )

        # delete the key
        read_key.delete()
        self.assertFalse(write_key.exists())
        
        # delete the bucket
        self._s3_connection.delete_bucket(bucket_name)
예제 #3
0
    def _archive_one_file(
        self,
        bucket,
        key_name,
        replace,
        size,
    ):
        key = Key(bucket)
        key.name = key_name
        self._log.info("_archive_one_file ({0} {1} ...) versioning={2}".format(
            bucket.name,
            key.name,
            bucket.versioning,
        ))
        bucket_accounting = self._bucket_accounting[bucket.name]

        retry_count = 0
        force_error = random.randint(0, 99) < self._archive_failure_percent
        while not self._halt_event.is_set():
            bucket_accounting.increment_by("archive_request", 1)

            input_file = MockInputFile(size, force_error)

            try:
                key.set_contents_from_file(input_file, replace=replace)
            except MockInputFileError:
                bucket_accounting.increment_by("archive_error", 1)
                self._log.info("MockInputFileError")
                return
            except LumberyardRetryableHTTPError, instance:
                bucket_accounting.increment_by("archive_error", 1)
                if retry_count >= _max_archive_retries:
                    raise
                self._log.warn("%s: retry in %s seconds" % (
                    instance,
                    instance.retry_after,
                ))
                self._halt_event.wait(timeout=instance.retry_after)
                retry_count += 1
                self._log.warn("retry #%s" % (retry_count, ))
                continue

            verification_key = (
                bucket.name,
                key.name,
                key.version_id,
            )
            self._log.info("archived {0}".format(verification_key))
            if verification_key in self.key_verification:
                self._log.error("_archive_one_file duplicate key %s" %
                                (verification_key, ))
            bucket_accounting.increment_by("archive_success", 1)
            # we count this as 'bytes in' because that's what the server counts
            bucket_accounting.increment_by("success_bytes_in", size)
            self.key_verification[verification_key] = \
                    (size, input_file.md5_digest, )

            break
예제 #4
0
    def test_uninterrupted_resumable(self):
        """
        test get_contents_to_file without any interruption. 
        """
        log = logging.getLogger("test_uninterrupted_resumable")
        key_name = "test-key"
        test_file_path = os.path.join(test_dir_path, "test-orignal")
        test_file_size = 1024**2
        buffer_size = 1024

        log.debug("writing {0} bytes to {1}".format(test_file_size,
                                                    test_file_path))
        bytes_written = 0
        with open(test_file_path, "wb") as output_file:
            while bytes_written < test_file_size:
                output_file.write(os.urandom(buffer_size))
                bytes_written += buffer_size

        # create the bucket
        bucket = self._s3_connection.create_unique_bucket()
        self.assertTrue(bucket is not None)

        # create an empty key
        write_key = Key(bucket)

        # set the name
        write_key.name = key_name
        self.assertFalse(write_key.exists())

        # upload some data
        with open(test_file_path, "rb") as archive_file:
            write_key.set_contents_from_file(archive_file)
        self.assertTrue(write_key.exists())

        # create a ResumableDownloadHandler
        tracker_file_path = os.path.join(test_dir_path, "tracker-file")
        download_handler = ResumableDownloadHandler(
            tracker_file_name=tracker_file_path)

        # read back the data
        retrieve_file_path = os.path.join(test_dir_path,
                                          "test_key_with_files-orignal")
        with open(retrieve_file_path, "wb") as retrieve_file:
            write_key.get_contents_to_file(retrieve_file,
                                           res_download_handler=\
                                            download_handler)

        self.assertTrue(
            filecmp.cmp(test_file_path, retrieve_file_path, shallow=False))

        # delete the key
        write_key.delete()
        self.assertFalse(write_key.exists())

        # delete the bucket
        self._s3_connection.delete_bucket(bucket.name)
예제 #5
0
    def test_well_behaved_upload(self):
        """
        an upload that sends sequences of 10mb 
        """
        key_name = "test-key"
        sequence_size = 10 * 1024 * 1024  
        sequence_count = 3 
        total_seconds = 3 * 301

        write_key = Key(self._s3_connection.default_bucket)
        write_key.name = key_name

        test_file = MockInputFile(sequence_size, sequence_count, total_seconds)

        write_key.set_contents_from_file(test_file)        
        self.assertTrue(write_key.exists())
예제 #6
0
    def test_fast_upload(self):
        """
        the fastest upload we can manage
        """
        key_name = "test-key"
        sequence_size = 3 * 10 * 1024 * 1024  
        sequence_count = 1 
        total_seconds = 0.0

        write_key = Key(self._s3_connection.default_bucket)
        write_key.name = key_name

        test_file = MockInputFile(sequence_size, sequence_count, total_seconds)

        write_key.set_contents_from_file(test_file)        
        self.assertTrue(write_key.exists())
예제 #7
0
    def test_well_behaved_upload(self):
        """
        an upload that sends sequences of 10mb 
        """
        key_name = "test-key"
        sequence_size = 10 * 1024 * 1024
        sequence_count = 3
        total_seconds = 3 * 301

        write_key = Key(self._s3_connection.default_bucket)
        write_key.name = key_name

        test_file = MockInputFile(sequence_size, sequence_count, total_seconds)

        write_key.set_contents_from_file(test_file)
        self.assertTrue(write_key.exists())
예제 #8
0
    def test_fast_upload(self):
        """
        the fastest upload we can manage
        """
        key_name = "test-key"
        sequence_size = 3 * 10 * 1024 * 1024
        sequence_count = 1
        total_seconds = 0.0

        write_key = Key(self._s3_connection.default_bucket)
        write_key.name = key_name

        test_file = MockInputFile(sequence_size, sequence_count, total_seconds)

        write_key.set_contents_from_file(test_file)
        self.assertTrue(write_key.exists())
예제 #9
0
    def _archive_one_file( self, bucket, key_name, replace, size, ):
        key = Key(bucket)
        key.name = key_name
        self._log.info("_archive_one_file ({0} {1} ...) versioning={2}".format(
            bucket.name, 
            key.name, 
            bucket.versioning,
        ))
        bucket_accounting = self._bucket_accounting[bucket.name]

        retry_count = 0
        force_error = random.randint(0, 99) < self._archive_failure_percent
        while not self._halt_event.is_set():
            bucket_accounting.increment_by("archive_request", 1)

            input_file = MockInputFile(size, force_error)

            try:
                key.set_contents_from_file(input_file, replace=replace) 
            except MockInputFileError:
                bucket_accounting.increment_by("archive_error", 1)
                self._log.info("MockInputFileError")
                return
            except LumberyardRetryableHTTPError, instance:
                bucket_accounting.increment_by("archive_error", 1)
                if retry_count >= _max_archive_retries:
                    raise
                self._log.warn("%s: retry in %s seconds" % (
                    instance, instance.retry_after,
                ))
                self._halt_event.wait(timeout=instance.retry_after)
                retry_count += 1
                self._log.warn("retry #%s" % (retry_count, ))
                continue

            verification_key = (bucket.name, key.name, key.version_id, )
            self._log.info("archived {0}".format(verification_key))
            if verification_key in self.key_verification:
                self._log.error("_archive_one_file duplicate key %s" % (
                    verification_key, ))
            bucket_accounting.increment_by("archive_success", 1)
            # we count this as 'bytes in' because that's what the server counts
            bucket_accounting.increment_by("success_bytes_in", size)
            self.key_verification[verification_key] = \
                    (size, input_file.md5_digest, )

            break
예제 #10
0
    def test_slow_upload(self):
        """
        an upload that sends less than 10mb in 5 min
        """
        key_name = "test-key"
        sequence_size = 1024  
        sequence_count = 3 * 10 
        total_seconds = 3 * 10 * 60

        write_key = Key(self._s3_connection.default_bucket)
        write_key.name = key_name

        test_file = MockInputFile(sequence_size, sequence_count, total_seconds)

        with self.assertRaises(LumberyardHTTPError) as context_manager:
            write_key.set_contents_from_file(test_file)

        self.assertEqual(context_manager.exception.status, REQUEST_TIMEOUT)
예제 #11
0
    def test_slow_upload(self):
        """
        an upload that sends less than 10mb in 5 min
        """
        key_name = "test-key"
        sequence_size = 1024
        sequence_count = 3 * 10
        total_seconds = 3 * 10 * 60

        write_key = Key(self._s3_connection.default_bucket)
        write_key.name = key_name

        test_file = MockInputFile(sequence_size, sequence_count, total_seconds)

        with self.assertRaises(LumberyardHTTPError) as context_manager:
            write_key.set_contents_from_file(test_file)

        self.assertEqual(context_manager.exception.status, REQUEST_TIMEOUT)
예제 #12
0
def archive_file(args, nimbusio_identity, file_name):
    log = logging.getLogger("archive_file")

    s3_emulator = motoboto.connect_s3(nimbusio_identity)

    if args.collection_name is None:
        bucket = s3_emulator.default_bucket
    else:
        bucket = s3_emulator.get_bucket(args.collection_name)

    key_name = "".join([args.prefix, file_name])  
    archive_key = Key(bucket, key_name)

    log.info("archiving {0} as key {1} to collection {2}".format(file_name,
                                                                 archive_key,
                                                                 bucket))

    file_path = os.path.join(args.watch_path, file_name)
    with open(file_path, "rb") as input_file:
        archive_key.set_contents_from_file(input_file)
    log.info("archive successful version identifier = {0}".format(
             archive_key.version_id))    

    s3_emulator.close()
예제 #13
0
    def test_uninterrupted_resumable(self):
        """
        test get_contents_to_file without any interruption. 
        """
        log = logging.getLogger("test_uninterrupted_resumable")
        key_name = "test-key"
        test_file_path = os.path.join(
            test_dir_path, "test-orignal"
        )
        test_file_size = 1024 ** 2
        buffer_size = 1024

        log.debug("writing {0} bytes to {1}".format(test_file_size, 
                                                    test_file_path))
        bytes_written = 0
        with open(test_file_path, "wb") as output_file:
            while bytes_written < test_file_size:
                output_file.write(os.urandom(buffer_size))
                bytes_written += buffer_size

        # create the bucket
        bucket = self._s3_connection.create_unique_bucket()
        self.assertTrue(bucket is not None)

        # create an empty key
        write_key = Key(bucket)

        # set the name
        write_key.name = key_name
        self.assertFalse(write_key.exists())

        # upload some data
        with open(test_file_path, "rb") as archive_file:
            write_key.set_contents_from_file(archive_file)        
        self.assertTrue(write_key.exists())

        # create a ResumableDownloadHandler
        tracker_file_path = os.path.join(
            test_dir_path, "tracker-file"
        )
        download_handler = ResumableDownloadHandler(
            tracker_file_name=tracker_file_path
        )

        # read back the data
        retrieve_file_path = os.path.join(
            test_dir_path, "test_key_with_files-orignal"
        )
        with open(retrieve_file_path, "wb") as retrieve_file:
            write_key.get_contents_to_file(retrieve_file, 
                                           res_download_handler=\
                                            download_handler)      

        self.assertTrue(
            filecmp.cmp(test_file_path, retrieve_file_path, shallow=False)
        )

        # delete the key
        write_key.delete()
        self.assertFalse(write_key.exists())
        
        # delete the bucket
        self._s3_connection.delete_bucket(bucket.name)
예제 #14
0
    def test_interrupted_resumable(self):
        """
        test get_contents_to_file with a simulated interruption. 
        """
        log = logging.getLogger("test_uninterrupted_resumable")
        key_name = "test-key"
        test_file_path = os.path.join(
            test_dir_path, "test-orignal"
        )
        test_file_size = 1024 ** 2
        interrupted_size = 1024 * 42

        test_data = os.urandom(test_file_size)

        log.debug("writing {0} bytes to {1}".format(test_file_size, 
                                                    test_file_path))
        with open(test_file_path, "wb") as output_file:
            output_file.write(test_data)

        # create the bucket
        bucket = self._s3_connection.create_unique_bucket()
        self.assertTrue(bucket is not None)

        # create an empty key
        write_key = Key(bucket)

        # set the name
        write_key.name = key_name
#        self.assertFalse(write_key.exists())

        # upload some data
        with open(test_file_path, "rb") as archive_file:
            write_key.set_contents_from_file(archive_file)        
        self.assertTrue(write_key.exists())

        # create a ResumableDownloadHandler
        tracker_file_path = os.path.join(
            test_dir_path, "tracker-file"
        )
        download_handler = ResumableDownloadHandler(
            tracker_file_name=tracker_file_path
        )

        retrieve_file_path = os.path.join(
            test_dir_path, "test_key_with_files-orignal"
        )

        # copy some of the data to the retrieve file to simulate an
        # interrupted retrieve
        with open(retrieve_file_path, "wb") as output_file:
            output_file.write(test_data[interrupted_size:])

        # spoof the resumable handler into thinking it has a retrieve
        # in progress
        download_handler._save_tracker_info(write_key)

        # resume the retrieve
        with open(retrieve_file_path, "wb") as retrieve_file:
            write_key.get_contents_to_file(retrieve_file, 
                                           res_download_handler=\
                                            download_handler)      

        # read back the retrieved data
        with open(retrieve_file_path, "rb") as retrieve_file:
            retrieved_data = retrieve_file.read()

        self.assertEqual(len(retrieved_data), len(test_data))
        self.assertTrue(retrieved_data == test_data)

        # delete the key
        write_key.delete()
        self.assertFalse(write_key.exists())
        
        # delete the bucket
        self._s3_connection.delete_bucket(bucket.name)
예제 #15
0
    def test_key_with_files_and_callback(self):
        """
        test simple key 'from_file' and 'to_file' functions
        """
        def _archive_callback(bytes_sent, total_bytes):
            print >> sys.stderr, "archived", str(bytes_sent), "out of", \
                    str(total_bytes)

        def _retrieve_callback(bytes_sent, total_bytes):
            print >> sys.stderr, "retrieved", str(bytes_sent), "out of", \
                    str(total_bytes)

        log = logging.getLogger("test_key_with_files")
        bucket_name = "com-dougfort-test-key-with-files-and-callback"
        key_name = "A" * 1024
        test_file_path = os.path.join(
            _test_dir_path, "test_key_with_files-orignal"
        )
        test_file_size = 1024 ** 2
        buffer_size = 1024

        log.debug("writing %s bytes to %s" % (
            test_file_size, test_file_path, 
        ))
        bytes_written = 0
        with open(test_file_path, "w") as output_file:
            while bytes_written < test_file_size:
                output_file.write(_random_string(buffer_size))
                bytes_written += buffer_size

        # create the bucket
        bucket = self._s3_connection.create_bucket(bucket_name)
        self.assertTrue(bucket is not None)
        self.assertEqual(bucket.name, bucket_name)

        # create an empty key
        write_key = Key(bucket)

        # set the name
        write_key.name = key_name
        self.assertFalse(write_key.exists())

        # upload some data
        with open(test_file_path, "r") as archive_file:
            write_key.set_contents_from_file(
                archive_file, cb=_archive_callback
            )        
        self.assertTrue(write_key.exists())

        # create another key with the same name 
        read_key = Key(bucket, key_name)

        # read back the data
        retrieve_file_path = os.path.join(
            _test_dir_path, "test_key_with_files-orignal"
        )
        # 2011-08-08 dougfort boto aborts if you don't tell it the size
        read_key.size = test_file_size
        with open(retrieve_file_path, "w") as retrieve_file:
            read_key.get_contents_to_file(
                retrieve_file, cb=_retrieve_callback
            )      
        self.assertTrue(
            filecmp.cmp(test_file_path, retrieve_file_path, shallow=False)
        )

        # delete the key
        read_key.delete()
        self.assertFalse(write_key.exists())
        
        # delete the bucket
        self._s3_connection.delete_bucket(bucket_name)
예제 #16
0
    def test_interrupted_resumable(self):
        """
        test get_contents_to_file with a simulated interruption. 
        """
        log = logging.getLogger("test_uninterrupted_resumable")
        key_name = "test-key"
        test_file_path = os.path.join(test_dir_path, "test-orignal")
        test_file_size = 1024**2
        interrupted_size = 1024 * 42

        test_data = os.urandom(test_file_size)

        log.debug("writing {0} bytes to {1}".format(test_file_size,
                                                    test_file_path))
        with open(test_file_path, "wb") as output_file:
            output_file.write(test_data)

        # create the bucket
        bucket = self._s3_connection.create_unique_bucket()
        self.assertTrue(bucket is not None)

        # create an empty key
        write_key = Key(bucket)

        # set the name
        write_key.name = key_name
        #        self.assertFalse(write_key.exists())

        # upload some data
        with open(test_file_path, "rb") as archive_file:
            write_key.set_contents_from_file(archive_file)
        self.assertTrue(write_key.exists())

        # create a ResumableDownloadHandler
        tracker_file_path = os.path.join(test_dir_path, "tracker-file")
        download_handler = ResumableDownloadHandler(
            tracker_file_name=tracker_file_path)

        retrieve_file_path = os.path.join(test_dir_path,
                                          "test_key_with_files-orignal")

        # copy some of the data to the retrieve file to simulate an
        # interrupted retrieve
        with open(retrieve_file_path, "wb") as output_file:
            output_file.write(test_data[interrupted_size:])

        # spoof the resumable handler into thinking it has a retrieve
        # in progress
        download_handler._save_tracker_info(write_key)

        # resume the retrieve
        with open(retrieve_file_path, "wb") as retrieve_file:
            write_key.get_contents_to_file(retrieve_file,
                                           res_download_handler=\
                                            download_handler)

        # read back the retrieved data
        with open(retrieve_file_path, "rb") as retrieve_file:
            retrieved_data = retrieve_file.read()

        self.assertEqual(len(retrieved_data), len(test_data))
        self.assertTrue(retrieved_data == test_data)

        # delete the key
        write_key.delete()
        self.assertFalse(write_key.exists())

        # delete the bucket
        self._s3_connection.delete_bucket(bucket.name)