def should_return_when_empty_file_checksum_matches( self, get_object_mock: MagicMock) -> None: get_object_mock.return_value = {"Body": StreamingBody(BytesIO(), 0)} with patch( "backend.check_files_checksums.utils.processing_assets_model_with_meta" ): ChecksumValidator(any_table_name(), MockValidationResultFactory(), self.logger).validate_url_multihash( any_s3_url(), EMPTY_FILE_MULTIHASH)
def test_streaming_s3_objects(): # GH17135 # botocore gained iteration support in 1.10.47, can now be used in read_* pytest.importorskip("botocore", minversion="1.10.47") from botocore.response import StreamingBody data = [b"foo,bar,baz\n1,2,3\n4,5,6\n", b"just,the,header\n"] for el in data: body = StreamingBody(BytesIO(el), content_length=len(el)) read_csv(body)
def gen_s3_object_content( content: Union[Dict[str, Any], str]) -> StreamingBody: """Convert a string or dict to S3 object body. Args: content: S3 object body. """ if isinstance(content, dict): content = json.dumps(content, default=json_serial) encoded_content = content.encode() return StreamingBody(io.BytesIO(encoded_content), len(encoded_content))
def setUp(self): self.crawler = get_crawler(Spider) self.spider = self.crawler._create_spider(self.spider_name) self.tmpdir = tempfile.mkdtemp() self.request = Request('http://www.example.com', headers={'User-Agent': 'test'}) self.response = Response('http://www.example.com', headers={'Content-Type': 'text/html'}, body=b'test body', status=202) self.crawler.stats.open_spider(self.spider) self.cached_response = { 'meta': { 'url': self.request.url, 'method': self.request.method, 'status': self.response.status, 'response_url': self.response.url, 'timestamp': time.time(), }, 'response_headers': headers_dict_to_raw(self.response.headers), 'response_body': self.response.body, 'request_headers': headers_dict_to_raw(self.request.headers), 'request_body': self.request.body } self.pickled_cached_response = pickle.dumps(self.cached_response) self.get_object_response = { 'Body': StreamingBody( io.BytesIO(self.pickled_cached_response), len(self.pickled_cached_response) ) } self.gzipped_pickled_cached_response = gzip.compress(self.pickled_cached_response) self.get_object_response_gziped = { 'Body': StreamingBody( io.BytesIO(self.gzipped_pickled_cached_response), len(self.gzipped_pickled_cached_response) ) }
def retrieve_setup(arntask, creds, device_value, completed_value, results_json): """Retrieve test setup.""" body = StreamingBody(StringIO(results_json), len(results_json)) results_dict = { 'ResponseMetadata': { 'RequestId': 'CF4CAA48CC18836C', 'HTTPHeaders': {}, }, 'Body': body, } return arntask, creds, completed_value, device_value, results_dict
def test_read_only(): encrypt_key_arn = 'arn:aws:kms:region:account_id:key/guid' bucket_name = 'bucket' file_name = 'conf.json' settings = {'setting_1': 'foo'} s3 = botocore.session.get_session().create_client('s3') conn = conf.conn(encrypt_key_arn, client=s3) expected_put_response = { 'Expiration': 'string', 'ETag': 'string', 'ServerSideEncryption': 'AES256', 'VersionId': 'string', 'SSECustomerAlgorithm': 'string', 'SSECustomerKeyMD5': 'string', 'SSEKMSKeyId': 'string', 'RequestCharged': 'requester' } put_parameters = { 'Body': json.dumps(settings), 'Bucket': bucket_name, 'Key': file_name, 'SSEKMSKeyId': 'arn:aws:kms:region:account_id:key/guid', 'ServerSideEncryption': 'aws:kms' } data = BytesIO(json.dumps(settings).encode('utf-8')) # data.seek(0) expected_get_response = { 'Body': StreamingBody(raw_stream=data, content_length=20) } get_parameters = {'Bucket': bucket_name, 'Key': file_name} with Stubber(s3) as stubber: # verify we can put data in stubber.add_response('put_object', expected_put_response, put_parameters) saved_settings = conf.save(conn, bucket_name, file_name, settings) assert saved_settings == settings # verify we can get data out stubber.add_response('get_object', expected_get_response, get_parameters) loaded_settings = conf.read_only(conn, bucket_name, file_name) assert loaded_settings == settings # verify cache has data assert conf.read_only_cache.currsize == 1 # verify cache has the right data assert conf.read_only_cache[('bucket', 'conf.json')] == settings
def should_raise_exception_when_checksum_does_not_match( self, get_object_mock: MagicMock ) -> None: get_object_mock.return_value = {"Body": StreamingBody(BytesIO(), 0)} checksum = "0" * 64 with raises(ChecksumMismatchError), patch( "backend.check_files_checksums.utils.processing_assets_model_with_meta" ): ChecksumValidator( any_table_name(), MockValidationResultFactory(), self.logger ).validate_url_multihash( any_s3_url(), f"{SHA2_256:x}{SHA256_CHECKSUM_BYTE_COUNT:x}{checksum}" )
def should_return_when_file_checksum_matches(self, get_object_mock: MagicMock) -> None: file_contents = b"x" * (CHUNK_SIZE + 1) get_object_mock.return_value = { "Body": StreamingBody(BytesIO(initial_bytes=file_contents), len(file_contents)) } multihash = ( f"{SHA2_256:x}{SHA256_CHECKSUM_BYTE_COUNT:x}" "c6d8e9905300876046729949cc95c2385221270d389176f7234fe7ac00c4e430" ) with patch("backend.check_files_checksums.utils.processing_assets_model_with_meta"): ChecksumValidator( any_table_name(), MockValidationResultFactory(), self.logger ).validate_url_multihash(any_s3_url(), multihash)
def test_load_data_dump_to_dict_object(self): bucket = "codTest" key = "key" expected = {"test": 1} expected_json = json.dumps(expected) params = dict(Bucket=bucket, Key=key) file_obj = io.BytesIO(expected_json.encode('utf-8')) stream = StreamingBody(file_obj, content_length=len(expected_json)) response = {'Body': stream} self.stubber.add_response('get_object', response, params) self.stubber.activate() out = self.helper.load_data_dump_to_dict_object(s3_bucket=bucket, file_key=key) self.assertEqual(expected, out)
def gen_s3_object_content(content): """Convert a string or dict to S3 object body. Args: content (Union[str, Dict[str, Any]]): S3 object body Returns: botocore.response.StreamingBody Used in the Body of a s3.get_object response. """ if isinstance(content, dict): content = json.dumps(content, default=json_serial) encoded_content = content.encode() return StreamingBody(io.BytesIO(encoded_content), len(encoded_content))
def test_get_policy_content_s3(self): # setup expected_result = self.policy self.sut.policy_content = dict(s3=dict(bucket="my_bucket", key="my_key")) encoded_policy = json.dumps(self.policy).encode("utf-8") self.hub_client_mock.get_object.return_value = { "Body": StreamingBody(io.BytesIO(encoded_policy), len(encoded_policy)) } # exercise actual_result = self.sut.get_unwrapped_policy() # verify self.assertEqual(expected_result, actual_result)
def _build_response(content, version, content_type): if content_type == "application/json": content_text = json.dumps(content).encode("utf-8") elif content_type == "application/x-yaml": content_text = str(yaml.dump(content)).encode("utf-8") else: content_text = content.encode("utf-8") return { "Content": StreamingBody(io.BytesIO(bytes(content_text)), len(content_text)), "ConfigurationVersion": version, "ContentType": content_type, }
def mock_get_object_response(raw_body: str) -> Dict[str, Any]: """ Mock s3 client get_object() response object. See https://gist.github.com/grantcooksey/132ddc85274a50b94b821302649f9d7b Parameters ---------- raw_body: Content of the 'Body' field to return """ encoded_message = raw_body.encode("utf-8") raw_stream = StreamingBody(io.BytesIO(encoded_message), len(encoded_message)) return {"Body": raw_stream}
def test_media_s3_valid_file(mocker, client): mock_client = mocker.patch( "dataworkspace.apps.core.boto3_client.boto3.client") file_content = b"some file content stored on s3" mock_client().get_object.return_value = { "ContentType": "text/plain", "ContentLength": len(file_content), "Body": StreamingBody(io.BytesIO(file_content), len(file_content)), } response = client.get( reverse("uploaded-media") + "?path=uploaded-media/test.txt") assert response.status_code == 200 assert list( response.streaming_content)[0] == b"some file content stored on s3" assert response["content-length"] == str( len(b"some file content stored on s3"))
def test_lambda_function(FunctionName,Payload): try: client = boto3.client('lambda') response = client.invoke( FunctionName=FunctionName, Payload=Payload ) except ClientError as e: print("Unexpected error: {}".format(e)) return {'Status': False} if response['ResponseMetadata']['HTTPStatusCode'] == 200: responsePayload = response['Payload'] lambda_output = StreamingBody(responsePayload,response['ResponseMetadata']['HTTPHeaders']['content-length']).read().decode('utf-8') return {'Status': True,'lambda_output': lambda_output} else: return {'Status': False}
def test_preview_csv(self, mock_client): link = factories.SourceLinkFactory(url="s3://a/path/to/a/file.csv") mock_client().head_object.return_value = {"ContentType": "text/csv"} csv_content = b"col1,col2\nrow1-col1, row1-col2\nrow2-col1, row2-col2\ntrailing" mock_client().get_object.return_value = { "ContentType": "text/plain", "ContentLength": len(csv_content), "Body": StreamingBody(io.BytesIO(csv_content), len(csv_content)), } assert link.get_preview_data() == ( ["col1", "col2"], [ OrderedDict([("col1", "row1-col1"), ("col2", " row1-col2")]), OrderedDict([("col1", "row2-col1"), ("col2", " row2-col2")]), ], )
def incomplete_read_error(lambda_function, runtime_variables, environment_variables, file_list, wrangler_name, expected_message="Incomplete Lambda response"): """ Function to trigger an incomplete read error in a given wrangler. Takes in a valid file(s) so that the function performs until after the lambda invoke. The data that triggers the incomplete_read is generic, so hardcoded as a variable. :param lambda_function: Lambda function to test - Type: Function :param runtime_variables: Runtime variables to send to function - Type: Dict :param environment_variables: Environment Vars to send to function - Type: Dict :param file_list: List of input files for the function - Type: List :param wrangler_name: Wrangler that is being tested, used in mocking boto3. - Type: String :param expected_message: - Error message we are expecting. - Type: String (default to match current exception handling) :return Test Pass/Fail """ bucket_name = environment_variables["bucket_name"] client = create_bucket(bucket_name) upload_files(client, bucket_name, file_list) with mock.patch(wrangler_name + ".boto3.client") as mock_client: mock_client_object = mock.Mock() mock_client.return_value = mock_client_object test_data_bad = io.BytesIO(b'{"Bad Bytes": 999}') mock_client_object.invoke.return_value = { "Payload": StreamingBody(test_data_bad, 1) } with pytest.raises(exception_classes.LambdaFailure) as exc_info: if not environment_variables: lambda_function.lambda_handler(runtime_variables, context_object) else: with mock.patch.dict(lambda_function.os.environ, environment_variables): lambda_function.lambda_handler(runtime_variables, context_object) assert expected_message in exc_info.value.error_message
def get_response_from_file(self, file_name): with open(file_name, 'r') as file: data = file.read() body_encoded = data.encode() body = StreamingBody( io.BytesIO(body_encoded), len(body_encoded) ) response = { 'Body': body } return response
def _build_response(content, content_type, next_token="token5678", poll=30): if content_type == "application/json": content_text = json.dumps(content).encode("utf-8") elif content_type == "application/x-yaml": content_text = str(yaml.dump(content)).encode("utf-8") else: content_text = content.encode("utf-8") return { "Configuration": StreamingBody(io.BytesIO(bytes(content_text)), len(content_text)), "ContentType": content_type, "NextPollConfigurationToken": next_token, "NextPollIntervalInSeconds": poll, }
def test_make_boto_response_json_serializable_succeeds(): test_string = "hello world" test_int = 1 test_bool = True test_none = None test_float = 1.0 streaming_body_bytes = bytes(test_string, "utf-8") test_streaming_body = StreamingBody( io.BytesIO(streaming_body_bytes), len(streaming_body_bytes) ) test_list = [test_string, test_int, test_float] test_datetime = datetime(2016, 6, 23) test_dict = { "str": test_string, "int": test_int, "float": test_float, "bool": test_bool, "None": test_none, "datetime": test_datetime, "list": test_list, "dict": { "str": test_int, "int": test_int, "datetime": test_datetime, "StreamingBody": test_streaming_body, }, } expected_output = { "str": test_string, "int": test_int, "float": test_float, "bool": test_bool, "None": test_none, "datetime": str(test_datetime), "list": test_list, "dict": { "str": test_int, "int": test_int, "datetime": str(test_datetime), "StreamingBody": test_string, }, } assert make_boto_response_json_serializable(test_dict) == expected_output
def test_bad_json(appconfig_stub, mocker): client, stub, session = appconfig_stub content_text = """{"broken": "json",}""".encode("utf-8") _add_start_stub(stub) broken_response = _build_response({}, "application/json") broken_response["Configuration"] = StreamingBody( io.BytesIO(bytes(content_text)), len(content_text)) stub.add_response( "get_latest_configuration", broken_response, _build_request(), ) mocker.patch.object(boto3, "client", return_value=client) a = AppConfigHelper("AppConfig-App", "AppConfig-Env", "AppConfig-Profile", 15) with pytest.raises(ValueError): a.update_config()
def test_get_bucket_item(s3_stub): key = 'default/cluster2/bank-app2/Deployment/apps-v1/podinfo.yaml' bucket_name = 'test-bucket' body = 'hello world'.encode() response_stream = StreamingBody(io.BytesIO(body), len(body)) s3_stub.add_response('get_object', expected_params={ 'Bucket': bucket_name, 'Key': key }, service_response={'Body': response_stream}) s3_stub.activate() retrieve = Retrieve(client=s3_stub.client, bucket_name=bucket_name) result = retrieve.get_bucket_item(key) assert result == body
def test_download_local_file(self, mock_client): group = factories.DataGroupingFactory.create() dataset = factories.DataSetFactory.create( grouping=group, published=True, user_access_type='REQUIRES_AUTHENTICATION') link = factories.SourceLinkFactory( id='158776ec-5c40-4c58-ba7c-a3425905ec45', dataset=dataset, link_type=SourceLink.TYPE_LOCAL, url='s3://sourcelink/158776ec-5c40-4c58-ba7c-a3425905ec45/test.txt', ) log_count = EventLog.objects.count() download_count = dataset.number_of_downloads mock_client().get_object.return_value = { 'ContentType': 'text/plain', 'Body': StreamingBody(io.BytesIO(b'This is a test file'), len(b'This is a test file')), } response = self._authenticated_get( reverse( 'catalogue:dataset_source_link_download', kwargs={ 'group_slug': group.slug, 'set_slug': dataset.slug, 'source_link_id': link.id, }, )) self.assertEqual(response.status_code, 200) self.assertEqual( list(response.streaming_content)[0], b'This is a test file') mock_client().get_object.assert_called_with( Bucket=settings.AWS_UPLOADS_BUCKET, Key=link.url) self.assertEqual(EventLog.objects.count(), log_count + 1) self.assertEqual( EventLog.objects.latest().event_type, EventLog.TYPE_DATASET_SOURCE_LINK_DOWNLOAD, ) self.assertEqual( DataSet.objects.get(pk=dataset.id).number_of_downloads, download_count + 1)
def test_bad_request(appconfig_stub_ignore_pendng, mocker): client, stub, session = appconfig_stub_ignore_pendng content_text = """hello world""".encode("utf-8") stub.add_response( "get_configuration", { "Content": StreamingBody(io.BytesIO(bytes(content_text)), len(content_text)), "ConfigurationVersion": "1", "ContentType": "image/jpeg", }, _build_request("", "", ""), ) mocker.patch.object(boto3, "client", return_value=client) with pytest.raises(botocore.exceptions.ParamValidationError): a = AppConfigHelper("", "", "", 15) a.update_config()
def load_response(self, service, operation): LOG.debug('load_response: %s.%s', service, operation) response_file = self.get_next_file_path(service, operation) LOG.debug('load_responses: %s', response_file) with open(response_file, 'r') as fp: response_data = json.load(fp, object_hook=deserialize) if 'Body' in response_data['data']: import cStringIO self.stream = cStringIO.StringIO() self.stream.write(response_data['data']['Body']) self.stream.seek(0) response_data['data']['ContentLength'] = \ len(response_data['data']['Body']) response_data['data']['Body'] = \ StreamingBody(self.stream, len(response_data['data']['Body'])) return (FakeHttpResponse(response_data['status_code']), response_data['data'])
def test_render_and_upload_scheduler_plugin_template( self, mocker, cluster, template_url): scheduler_plugin_template = "Test" scheduler_plugin_template_encoded = scheduler_plugin_template.encode( "utf-8") if template_url.startswith("s3://"): mocker.patch( "pcluster.aws.s3.S3Client.get_object", autospec=True, return_value={ "Body": StreamingBody(BytesIO(scheduler_plugin_template_encoded), len(scheduler_plugin_template_encoded)) }, ) else: file_mock = mocker.MagicMock() file_mock.read.return_value.decode.return_value = scheduler_plugin_template mocker.patch("pcluster.models.cluster.urlopen" ).return_value.__enter__.return_value = file_mock mocker.patch("pcluster.models.cluster.parse_config", return_value={"Test"}) mocker.patch("pcluster.models.cluster.Cluster.source_config_text", new_callable=PropertyMock) cluster_config_mock = mocker.patch( "pcluster.models.cluster.Cluster.config", new_callable=PropertyMock) cluster_config_mock.return_value.scheduling.settings.scheduler_definition.cluster_infrastructure.cloud_formation.template = ( # noqa template_url) cluster_config_mock.return_value.get_instance_types_data.return_value = { "t2.micro": "instance_info" } upload_cfn_template_mock = mocker.patch.object(cluster.bucket, "upload_cfn_template", autospec=True) cluster._render_and_upload_scheduler_plugin_template() upload_cfn_template_mock.assert_called_with( scheduler_plugin_template, PCLUSTER_S3_ARTIFACTS_DICT["scheduler_plugin_template_name"], S3FileFormat.TEXT)
def test_configuration_no_encrypt_key(): bucket_name = 'bucket' file_name = 'conf.json' settings = {'setting_1': 'foo'} s3 = botocore.session.get_session().create_client('s3') conn = conf.conn(client=s3) expected_put_response = { 'Expiration': 'string', 'ETag': 'string', 'ServerSideEncryption': 'AES256', 'VersionId': 'string', 'SSECustomerAlgorithm': 'string', 'SSECustomerKeyMD5': 'string', 'SSEKMSKeyId': 'string', 'RequestCharged': 'requester' } put_parameters = { 'Body': json.dumps(settings), 'Bucket': bucket_name, 'Key': file_name, 'ServerSideEncryption': 'AES256' } data = BytesIO(json.dumps(settings).encode('utf-8')) # data.seek(0) expected_get_response = { 'Body': StreamingBody(raw_stream=data, content_length=20) } get_parameters = {'Bucket': bucket_name, 'Key': file_name} with Stubber(s3) as stubber: stubber.add_response('put_object', expected_put_response, put_parameters) saved_settings = conf.save(conn, bucket_name, file_name, settings) stubber.add_response('get_object', expected_get_response, get_parameters) loaded_settings = conf.load(conn, bucket_name, file_name) assert saved_settings == settings assert loaded_settings == settings
def test_scheduler_plugin_settings_schema( mocker, scheduler_definition, grant_sudo_privileges, s3_error, https_error, yaml_load_error, failure_message ): scheduler_plugin_settings_schema = {} body_encoded = json.dumps( {"PluginInterfaceVersion": "1.0", "Events": {"HeadInit": {"ExecuteCommand": {"Command": "env"}}}} ).encode("utf8") if isinstance(scheduler_definition, str): if scheduler_definition.startswith("s3"): mocker.patch( "pcluster.aws.s3.S3Client.get_object", return_value={"Body": StreamingBody(BytesIO(body_encoded), len(body_encoded))}, side_effect=s3_error, ) else: file_mock = mocker.MagicMock() file_mock.read.return_value.decode.return_value = body_encoded mocker.patch( "pcluster.schemas.cluster_schema.urlopen", side_effect=https_error ).return_value.__enter__.return_value = file_mock if yaml_load_error: mocker.patch("pcluster.schemas.cluster_schema.yaml.safe_load", side_effect=yaml_load_error) if scheduler_definition: scheduler_plugin_settings_schema["SchedulerDefinition"] = scheduler_definition if grant_sudo_privileges: scheduler_plugin_settings_schema["GrantSudoPrivileges"] = grant_sudo_privileges if failure_message: with pytest.raises(ValidationError, match=failure_message): SchedulerPluginSettingsSchema().load(scheduler_plugin_settings_schema) else: scheduler_plugin_settings = SchedulerPluginSettingsSchema().load(scheduler_plugin_settings_schema) assert_that(scheduler_plugin_settings.scheduler_definition.plugin_interface_version).is_equal_to("1.0") assert_that( scheduler_plugin_settings.scheduler_definition.events.head_init.execute_command.command ).is_equal_to("env") if grant_sudo_privileges: assert_that(scheduler_plugin_settings.grant_sudo_privileges).is_equal_to(grant_sudo_privileges) else: assert_that(scheduler_plugin_settings.grant_sudo_privileges).is_equal_to(False)
def functional_setup(arntask, creds, s3_folder, search_value, device_value, completed_value, results_json): """Functional test setup.""" qtarntask = {'quantumTaskArn': arntask} body2 = StreamingBody(StringIO(results_json), len(results_json)) results_dict = { 'ResponseMetadata': { 'RequestId': 'CF4CAA48CC18836C', 'HTTPHeaders': {}, }, 'Body': body2, } return ( creds, s3_folder, search_value, device_value, qtarntask, completed_value, results_dict, )
def test_unknown_content_type(appconfig_stub, mocker): client, stub, session = appconfig_stub content_text = """hello world""".encode("utf-8") stub.add_response( "get_configuration", { "Content": StreamingBody(io.BytesIO(bytes(content_text)), len(content_text)), "ConfigurationVersion": "1", "ContentType": "image/jpeg", }, _build_request(), ) mocker.patch.object(boto3, "client", return_value=client) a = AppConfigHelper("AppConfig-App", "AppConfig-Env", "AppConfig-Profile", 15) a.update_config() assert a.config == b"hello world" assert a.content_type == "image/jpeg" assert a.raw_config == content_text