Esempio n. 1
0
class TestParamFileVisitor(unittest.TestCase):
    def setUp(self):
        self.model = yaml.safe_load(open(os.path.join(MODEL_DIR, 'service.yaml')))
        self.service_model = ServiceModel(self.model, 'servicename')
        self.resolver = ShapeResolver(self.model['definitions'])
        self.files = FileCreator()

    def tearDown(self):
        self.files.remove_all()

    def test_visitor(self):
        contents = 'This is a test'
        filename = self.files.create_file('jobOne.hql', contents)
        # We have modified our test model to mark jobXml with x-no-paramfile.
        params = {'clusterName': u'foo',
                  'jobs': [{'hiveJob': {'script': 'file://' + filename,
                                        'jobXml': 'file://' + filename}}]}
        shape = self.resolver.get_shape_by_name(
            'submit-jobs-request', 'SubmitJobsRequest')
        visited = ParamFileVisitor().visit(params, shape)
        params['jobs'][0]['hiveJob']['script'] = contents
        self.assertEqual(params, visited)

    def test_ref_map_visitor(self):
        contents = 'This is a test'
        filename = self.files.create_file('jobOne.hql', contents)
        # We have modified our test model to mark jobXml with x-no-paramfile.
        params = {'jobs': {'job1': {'hiveJob': {'script': 'file://' + filename,
                                    'jobXml': 'file://' + filename}}}}
        shape = self.resolver.get_shape_by_name(
            'map-paramfile-test', 'RefMapParamFileTest')
        visited = ParamFileVisitor().visit(params, shape)
        params['jobs']['job1']['hiveJob']['script'] = contents
        self.assertEqual(params, visited)

    def test_explicit_map_visitor(self):
        contents = 'This is a test'
        filename = self.files.create_file('jobOne.hql', contents)
        # We have modified our test model to mark jobXml with x-no-paramfile.
        params = {'jobs': {'job1': {'script': 'file://' + filename,
                                    'jobXml': 'file://' + filename}}}
        shape = self.resolver.get_shape_by_name(
            'map-paramfile-test', 'ExplicitMapParamFileTest')
        visited = ParamFileVisitor().visit(params, shape)
        params['jobs']['job1']['script'] = contents
        self.assertEqual(params, visited)

    def test_blob_visitor(self):
        contents = b'This is a test'
        filename = self.files.create_file('jobOne.hql', contents, mode='wb')
        # We have modified our test model to mark jobXml with x-no-paramfile.
        params = {'jobs': {'job1': {'script': 'fileb://' + filename,
                                    'jobXml': 'fileb://' + filename}}}
        shape = self.resolver.get_shape_by_name(
            'blob-test', 'BlobParamFileTest')
        visited = ParamFileVisitor().visit(params, shape)
        params['jobs']['job1']['script'] = contents
        self.assertEqual(params, visited)
Esempio n. 2
0
class TestParamFile(unittest.TestCase):
    def setUp(self):
        self.files = FileCreator()

    def tearDown(self):
        self.files.remove_all()

    def test_text_file(self):
        contents = 'This is a test'
        filename = self.files.create_file('foo', contents)
        prefixed_filename = 'file://' + filename
        data = get_paramfile(prefixed_filename)
        self.assertEqual(data, contents)
        self.assertIsInstance(data, six.string_types)

    def test_binary_file(self):
        contents = 'This is a test'
        filename = self.files.create_file('foo', contents)
        prefixed_filename = 'fileb://' + filename
        data = get_paramfile(prefixed_filename)
        self.assertEqual(data, b'This is a test')
        self.assertIsInstance(data, six.binary_type)

    @skip_if_windows('Binary content error only occurs '
                     'on non-Windows platforms.')
    def test_cannot_load_text_file(self):
        contents = b'\xbfX\xac\xbe'
        filename = self.files.create_file('foo', contents, mode='wb')
        prefixed_filename = 'file://' + filename
        with self.assertRaises(ResourceLoadingError):
            get_paramfile(prefixed_filename)

    def test_file_does_not_exist_raises_error(self):
        with self.assertRaises(ResourceLoadingError):
            get_paramfile('file://file/does/not/existsasdf.txt')

    def test_no_match_uris_returns_none(self):
        self.assertIsNone(get_paramfile('foobar://somewhere.bar'))

    def test_non_string_type_returns_none(self):
        self.assertIsNone(get_paramfile(100))
Esempio n. 3
0
class TestGetObjectWorker(StubbedClientTest):
    def setUp(self):
        super(TestGetObjectWorker, self).setUp()
        self.files = FileCreator()
        self.queue = queue.Queue()
        self.client_factory = mock.Mock(ClientFactory)
        self.client_factory.create_client.return_value = self.client
        self.transfer_monitor = TransferMonitor()
        self.osutil = OSUtils()
        self.worker = GetObjectWorker(queue=self.queue,
                                      client_factory=self.client_factory,
                                      transfer_monitor=self.transfer_monitor,
                                      osutil=self.osutil)
        self.transfer_id = self.transfer_monitor.notify_new_transfer()
        self.bucket = 'bucket'
        self.key = 'key'
        self.remote_contents = b'my content'
        self.temp_filename = self.files.create_file('tempfile', '')
        self.extra_args = {}
        self.offset = 0
        self.final_filename = self.files.full_path('final_filename')
        self.stream = six.BytesIO(self.remote_contents)
        self.transfer_monitor.notify_expected_jobs_to_complete(
            self.transfer_id, 1000)

    def tearDown(self):
        super(TestGetObjectWorker, self).tearDown()
        self.files.remove_all()

    def add_get_object_job(self, **override_kwargs):
        kwargs = {
            'transfer_id': self.transfer_id,
            'bucket': self.bucket,
            'key': self.key,
            'temp_filename': self.temp_filename,
            'extra_args': self.extra_args,
            'offset': self.offset,
            'filename': self.final_filename
        }
        kwargs.update(override_kwargs)
        self.queue.put(GetObjectJob(**kwargs))

    def add_shutdown(self):
        self.queue.put(SHUTDOWN_SIGNAL)

    def add_stubbed_get_object_response(self, body=None, expected_params=None):
        if body is None:
            body = self.stream
        get_object_response = {'Body': body}

        if expected_params is None:
            expected_params = {'Bucket': self.bucket, 'Key': self.key}

        self.stubber.add_response('get_object', get_object_response,
                                  expected_params)

    def assert_contents(self, filename, contents):
        self.assertTrue(os.path.exists(filename))
        with open(filename, 'rb') as f:
            self.assertEqual(f.read(), contents)

    def assert_does_not_exist(self, filename):
        self.assertFalse(os.path.exists(filename))

    def test_run_is_final_job(self):
        self.add_get_object_job()
        self.add_shutdown()
        self.add_stubbed_get_object_response()
        self.transfer_monitor.notify_expected_jobs_to_complete(
            self.transfer_id, 1)

        self.worker.run()
        self.stubber.assert_no_pending_responses()
        self.assert_does_not_exist(self.temp_filename)
        self.assert_contents(self.final_filename, self.remote_contents)

    def test_run_jobs_is_not_final_job(self):
        self.add_get_object_job()
        self.add_shutdown()
        self.add_stubbed_get_object_response()
        self.transfer_monitor.notify_expected_jobs_to_complete(
            self.transfer_id, 1000)

        self.worker.run()
        self.stubber.assert_no_pending_responses()
        self.assert_contents(self.temp_filename, self.remote_contents)
        self.assert_does_not_exist(self.final_filename)

    def test_run_with_extra_args(self):
        self.add_get_object_job(extra_args={'VersionId': 'versionid'})
        self.add_shutdown()
        self.add_stubbed_get_object_response(expected_params={
            'Bucket': self.bucket,
            'Key': self.key,
            'VersionId': 'versionid'
        })

        self.worker.run()
        self.stubber.assert_no_pending_responses()

    def test_run_with_offset(self):
        offset = 1
        self.add_get_object_job(offset=offset)
        self.add_shutdown()
        self.add_stubbed_get_object_response()

        self.worker.run()
        with open(self.temp_filename, 'rb') as f:
            f.seek(offset)
            self.assertEqual(f.read(), self.remote_contents)

    def test_run_error_in_get_object(self):
        self.add_get_object_job()
        self.add_shutdown()
        self.stubber.add_client_error('get_object', 'NoSuchKey', 404)
        self.add_stubbed_get_object_response()

        self.worker.run()
        self.assertIsInstance(
            self.transfer_monitor.get_exception(self.transfer_id), ClientError)

    def test_run_does_retries_for_get_object(self):
        self.add_get_object_job()
        self.add_shutdown()
        self.add_stubbed_get_object_response(
            body=StreamWithError(self.stream, ReadTimeoutError(
                endpoint_url='')))
        self.add_stubbed_get_object_response()

        self.worker.run()
        self.stubber.assert_no_pending_responses()
        self.assert_contents(self.temp_filename, self.remote_contents)

    def test_run_can_exhaust_retries_for_get_object(self):
        self.add_get_object_job()
        self.add_shutdown()
        # 5 is the current setting for max number of GetObject attempts
        for _ in range(5):
            self.add_stubbed_get_object_response(body=StreamWithError(
                self.stream, ReadTimeoutError(endpoint_url='')))

        self.worker.run()
        self.stubber.assert_no_pending_responses()
        self.assertIsInstance(
            self.transfer_monitor.get_exception(self.transfer_id),
            RetriesExceededError)

    def test_run_skips_get_object_on_previous_exception(self):
        self.add_get_object_job()
        self.add_shutdown()
        self.transfer_monitor.notify_exception(self.transfer_id, Exception())

        self.worker.run()
        # Note we did not add a stubbed response for get_object
        self.stubber.assert_no_pending_responses()

    def test_run_final_job_removes_file_on_previous_exception(self):
        self.add_get_object_job()
        self.add_shutdown()
        self.transfer_monitor.notify_exception(self.transfer_id, Exception())
        self.transfer_monitor.notify_expected_jobs_to_complete(
            self.transfer_id, 1)

        self.worker.run()
        self.stubber.assert_no_pending_responses()
        self.assert_does_not_exist(self.temp_filename)
        self.assert_does_not_exist(self.final_filename)

    def test_run_fails_to_rename_file(self):
        exception = OSError()
        osutil = RenameFailingOSUtils(exception)
        self.worker = GetObjectWorker(queue=self.queue,
                                      client_factory=self.client_factory,
                                      transfer_monitor=self.transfer_monitor,
                                      osutil=osutil)
        self.add_get_object_job()
        self.add_shutdown()
        self.add_stubbed_get_object_response()
        self.transfer_monitor.notify_expected_jobs_to_complete(
            self.transfer_id, 1)

        self.worker.run()
        self.assertEqual(self.transfer_monitor.get_exception(self.transfer_id),
                         exception)
        self.assert_does_not_exist(self.temp_filename)
        self.assert_does_not_exist(self.final_filename)

    @skip_if_windows('os.kill() with SIGINT not supported on Windows')
    def test_worker_cannot_be_killed(self):
        self.add_get_object_job()
        self.add_shutdown()
        self.transfer_monitor.notify_expected_jobs_to_complete(
            self.transfer_id, 1)

        def raise_ctrl_c(**kwargs):
            os.kill(os.getpid(), signal.SIGINT)

        mock_client = mock.Mock()
        mock_client.get_object = raise_ctrl_c
        self.client_factory.create_client.return_value = mock_client

        try:
            self.worker.run()
        except KeyboardInterrupt:
            self.fail('The worker should have not been killed by the '
                      'KeyboardInterrupt')
Esempio n. 4
0
class TestBotocoreCRTRequestSerializer(unittest.TestCase):
    def setUp(self):
        self.region = 'us-west-2'
        self.session = Session()
        self.session.set_config_variable('region', self.region)
        self.request_serializer = s3transfer.crt.BotocoreCRTRequestSerializer(
            self.session)
        self.bucket = "test_bucket"
        self.key = "test_key"
        self.files = FileCreator()
        self.filename = self.files.create_file('myfile', 'my content')
        self.expected_path = "/" + self.bucket + "/" + self.key
        self.expected_host = "s3.%s.amazonaws.com" % (self.region)

    def tearDown(self):
        self.files.remove_all()

    def test_upload_request(self):
        callargs = CallArgs(bucket=self.bucket,
                            key=self.key,
                            fileobj=self.filename,
                            extra_args={},
                            subscribers=[])
        coordinator = s3transfer.crt.CRTTransferCoordinator()
        future = s3transfer.crt.CRTTransferFuture(
            s3transfer.crt.CRTTransferMeta(call_args=callargs), coordinator)
        crt_request = self.request_serializer.serialize_http_request(
            "put_object", future)
        self.assertEqual("PUT", crt_request.method)
        self.assertEqual(self.expected_path, crt_request.path)
        self.assertEqual(self.expected_host, crt_request.headers.get("host"))
        self.assertIsNone(crt_request.headers.get("Authorization"))

    def test_download_request(self):
        callargs = CallArgs(bucket=self.bucket,
                            key=self.key,
                            fileobj=self.filename,
                            extra_args={},
                            subscribers=[])
        coordinator = s3transfer.crt.CRTTransferCoordinator()
        future = s3transfer.crt.CRTTransferFuture(
            s3transfer.crt.CRTTransferMeta(call_args=callargs), coordinator)
        crt_request = self.request_serializer.serialize_http_request(
            "get_object", future)
        self.assertEqual("GET", crt_request.method)
        self.assertEqual(self.expected_path, crt_request.path)
        self.assertEqual(self.expected_host, crt_request.headers.get("host"))
        self.assertIsNone(crt_request.headers.get("Authorization"))

    def test_delete_request(self):
        callargs = CallArgs(bucket=self.bucket,
                            key=self.key,
                            extra_args={},
                            subscribers=[])
        coordinator = s3transfer.crt.CRTTransferCoordinator()
        future = s3transfer.crt.CRTTransferFuture(
            s3transfer.crt.CRTTransferMeta(call_args=callargs), coordinator)
        crt_request = self.request_serializer.serialize_http_request(
            "delete_object", future)
        self.assertEqual("DELETE", crt_request.method)
        self.assertEqual(self.expected_path, crt_request.path)
        self.assertEqual(self.expected_host, crt_request.headers.get("host"))
        self.assertIsNone(crt_request.headers.get("Authorization"))
Esempio n. 5
0
class TestCRTTransferManager(unittest.TestCase):
    def setUp(self):
        self.region = 'us-west-2'
        self.bucket = "test_bucket"
        self.key = "test_key"
        self.files = FileCreator()
        self.filename = self.files.create_file('myfile', 'my content')
        self.expected_path = "/" + self.bucket + "/" + self.key
        self.expected_host = "s3.%s.amazonaws.com" % (self.region)
        self.s3_request = mock.Mock(awscrt.s3.S3Request)
        self.s3_crt_client = mock.Mock(awscrt.s3.S3Client)
        self.s3_crt_client.make_request.return_value = self.s3_request
        self.session = Session()
        self.session.set_config_variable('region', self.region)
        self.request_serializer = s3transfer.crt.BotocoreCRTRequestSerializer(
            self.session)
        self.transfer_manager = s3transfer.crt.CRTTransferManager(
            crt_s3_client=self.s3_crt_client,
            crt_request_serializer=self.request_serializer)
        self.record_subscriber = RecordingSubscriber()

    def tearDown(self):
        self.files.remove_all()

    def _assert_subscribers_called(self, expected_future=None):
        self.assertTrue(self.record_subscriber.on_queued_called)
        self.assertTrue(self.record_subscriber.on_done_called)
        if expected_future:
            self.assertIs(self.record_subscriber.on_queued_future,
                          expected_future)
            self.assertIs(self.record_subscriber.on_done_future,
                          expected_future)

    def _invoke_done_callbacks(self, **kwargs):
        callargs = self.s3_crt_client.make_request.call_args
        callargs_kwargs = callargs[1]
        on_done = callargs_kwargs["on_done"]
        on_done(error=None)

    def _simulate_file_download(self, recv_filepath):
        self.files.create_file(recv_filepath, "fake resopnse")

    def _simulate_make_request_side_effect(self, **kwargs):
        if kwargs.get('recv_filepath'):
            self._simulate_file_download(kwargs['recv_filepath'])
        self._invoke_done_callbacks()
        return mock.DEFAULT

    def test_upload(self):
        self.s3_crt_client.make_request.side_effect = self._simulate_make_request_side_effect
        future = self.transfer_manager.upload(self.filename, self.bucket,
                                              self.key, {},
                                              [self.record_subscriber])
        future.result()

        callargs = self.s3_crt_client.make_request.call_args
        callargs_kwargs = callargs[1]
        self.assertEqual(callargs_kwargs["send_filepath"], self.filename)
        self.assertIsNone(callargs_kwargs["recv_filepath"])
        self.assertEqual(callargs_kwargs["type"],
                         awscrt.s3.S3RequestType.PUT_OBJECT)
        crt_request = callargs_kwargs["request"]
        self.assertEqual("PUT", crt_request.method)
        self.assertEqual(self.expected_path, crt_request.path)
        self.assertEqual(self.expected_host, crt_request.headers.get("host"))
        self._assert_subscribers_called(future)

    def test_download(self):
        self.s3_crt_client.make_request.side_effect = self._simulate_make_request_side_effect
        future = self.transfer_manager.download(self.bucket, self.key,
                                                self.filename, {},
                                                [self.record_subscriber])
        future.result()

        callargs = self.s3_crt_client.make_request.call_args
        callargs_kwargs = callargs[1]
        # the recv_filepath will be set to a temporary file path with some
        # random suffix
        self.assertTrue(
            re.match(self.filename + ".*", callargs_kwargs["recv_filepath"]))
        self.assertIsNone(callargs_kwargs["send_filepath"])
        self.assertEqual(callargs_kwargs["type"],
                         awscrt.s3.S3RequestType.GET_OBJECT)
        crt_request = callargs_kwargs["request"]
        self.assertEqual("GET", crt_request.method)
        self.assertEqual(self.expected_path, crt_request.path)
        self.assertEqual(self.expected_host, crt_request.headers.get("host"))
        self._assert_subscribers_called(future)
        with open(self.filename, 'rb') as f:
            # Check the fake response overwrites the file because of download
            self.assertEqual(f.read(), b'fake resopnse')

    def test_delete(self):
        self.s3_crt_client.make_request.side_effect = self._simulate_make_request_side_effect
        future = self.transfer_manager.delete(self.bucket, self.key, {},
                                              [self.record_subscriber])
        future.result()

        callargs = self.s3_crt_client.make_request.call_args
        callargs_kwargs = callargs[1]
        self.assertIsNone(callargs_kwargs["send_filepath"])
        self.assertIsNone(callargs_kwargs["recv_filepath"])
        self.assertEqual(callargs_kwargs["type"],
                         awscrt.s3.S3RequestType.DEFAULT)
        crt_request = callargs_kwargs["request"]
        self.assertEqual("DELETE", crt_request.method)
        self.assertEqual(self.expected_path, crt_request.path)
        self.assertEqual(self.expected_host, crt_request.headers.get("host"))
        self._assert_subscribers_called(future)

    def test_blocks_when_max_requests_processes_reached(self):
        futures = []
        callargs = (self.bucket, self.key, self.filename, {}, [])
        max_request_processes = 128  # the hard coded max processes
        all_concurrent = max_request_processes + 1
        threads = []
        for i in range(0, all_concurrent):
            thread = submitThread(self.transfer_manager, futures, callargs)
            thread.start()
            threads.append(thread)
        self.assertLessEqual(self.s3_crt_client.make_request.call_count,
                             max_request_processes)
        # Release lock
        callargs = self.s3_crt_client.make_request.call_args
        callargs_kwargs = callargs[1]
        on_done = callargs_kwargs["on_done"]
        on_done(error=None)
        for thread in threads:
            thread.join()
        self.assertEqual(self.s3_crt_client.make_request.call_count,
                         all_concurrent)

    def _cancel_function(self):
        self.cancel_called = True
        self.s3_request.finished_future.set_exception(
            awscrt.exceptions.from_code(0))
        self._invoke_done_callbacks()

    def test_cancel(self):
        self.s3_request.finished_future = Future()
        self.cancel_called = False
        self.s3_request.cancel = self._cancel_function
        try:
            with self.transfer_manager:
                future = self.transfer_manager.upload(self.filename,
                                                      self.bucket, self.key,
                                                      {}, [])
                raise KeyboardInterrupt()
        except KeyboardInterrupt:
            pass

        with self.assertRaises(awscrt.exceptions.AwsCrtError):
            future.result()
        self.assertTrue(self.cancel_called)

    def test_serializer_error_handling(self):
        class SerializationException(Exception):
            pass

        class ExceptionRaisingSerializer(
                s3transfer.crt.BaseCRTRequestSerializer):
            def serialize_http_request(self, transfer_type, future):
                raise SerializationException()

        not_impl_serializer = ExceptionRaisingSerializer()
        transfer_manager = s3transfer.crt.CRTTransferManager(
            crt_s3_client=self.s3_crt_client,
            crt_request_serializer=not_impl_serializer)
        future = transfer_manager.upload(self.filename, self.bucket, self.key,
                                         {}, [])

        with self.assertRaises(SerializationException):
            future.result()

    def test_crt_s3_client_error_handling(self):
        self.s3_crt_client.make_request.side_effect = awscrt.exceptions.from_code(
            0)
        future = self.transfer_manager.upload(self.filename, self.bucket,
                                              self.key, {}, [])
        with self.assertRaises(awscrt.exceptions.AwsCrtError):
            future.result()