def test_returns_csv_row_as_dictionary(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket = conn.create_bucket(Bucket="test_bucket") s3_object = bucket.Object("test_object.csv.gz") s3_object.put(Body=build_gzip_csv( header=["header1", "header2"], rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]], )) s3_manager = S3DataManager(conn) expected = [ { "header1": "row1-col1", "header2": "row1-col2" }, { "header1": "row2-col1", "header2": "row2-col2" }, ] actual = s3_manager.read_gzip_csv("s3://test_bucket/test_object.csv.gz") assert list(actual) == expected
def test_will_log_writing_file_events(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket_name = "test_bucket" conn.create_bucket(Bucket=bucket_name) data = {"fruit": "mango"} s3_manager = S3DataManager(conn) object_uri = f"s3://{bucket_name}/test_object.json" with mock.patch.object(logger, "info") as mock_log_info: s3_manager.write_json(object_uri, data, metadata=SOME_METADATA) mock_log_info.assert_has_calls([ mock.call( f"Attempting to upload: {object_uri}", extra={ "event": "ATTEMPTING_UPLOAD_JSON_TO_S3", "object_uri": object_uri }, ), mock.call( f"Successfully uploaded to: {object_uri}", extra={ "event": "UPLOADED_JSON_TO_S3", "object_uri": object_uri }, ), ])
def test_writes_correct_content_type(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket = conn.create_bucket(Bucket="test_bucket") data = {"fruit": "mango"} s3_manager = S3DataManager(conn) expected = "application/json" s3_manager.write_json("s3://test_bucket/test_object.json", data, metadata=SOME_METADATA) actual = bucket.Object("test_object.json").get()["ContentType"] assert actual == expected
def test_writes_dictionary_with_timestamp(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket = conn.create_bucket(Bucket="test_bucket") s3 = S3DataManager(conn) data = {"timestamp": datetime(2020, 7, 23)} expected = b'{"timestamp": "2020-07-23T00:00:00"}' s3.write_json("s3://test_bucket/test_object.json", data, metadata=SOME_METADATA) actual = bucket.Object("test_object.json").get()["Body"].read() assert actual == expected
def test_writes_dictionary(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket = conn.create_bucket(Bucket="test_bucket") s3 = S3DataManager(conn) data = {"fruit": "mango"} expected = b'{"fruit": "mango"}' s3.write_json("s3://test_bucket/test_object.json", data, metadata=SOME_METADATA) actual = bucket.Object("test_object.json").get()["Body"].read() assert actual == expected
def test_write_json_will_write_metadata_when_supplied(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket_name = "test_bucket" bucket = conn.create_bucket(Bucket=bucket_name) data = {"fruit": "mango"} s3_manager = S3DataManager(conn) metadata = { "metadata_field": "metadata_field_value", "second_metadata_field": "metadata_field_second_value", } s3_manager.write_json(object_uri=f"s3://{bucket_name}/test_object.json", data=data, metadata=metadata) expected = metadata actual = bucket.Object("test_object.json").get()["Metadata"] assert actual == expected
def __init__(self, config): self._s3_client = boto3.resource("s3", endpoint_url=config.s3_endpoint_url) self._s3_manager = S3DataManager(self._s3_client) self._config = config self._uris = OdsDownloaderS3UriResolver( asid_lookup_bucket=self._config.mapping_bucket, ods_metadata_bucket=self._config.output_bucket, ) ods_client = OdsPortalClient(search_url=self._config.search_url) ods_data_fetcher = OdsPortalDataFetcher(ods_client=ods_client) probe = MetadataServiceObservabilityProbe() self._metadata_service = Gp2gpOrganisationMetadataService( data_fetcher=ods_data_fetcher, observability_probe=probe) self._output_metadata = { "date-anchor": self._config.date_anchor.isoformat(), "build-tag": self._config.build_tag, }
def test_will_log_reading_file_event(): conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION) bucket_name = "test_bucket" bucket = conn.create_bucket(Bucket=bucket_name) s3_object = bucket.Object("test_object.csv.gz") s3_object.put(Body=build_gzip_csv( header=["header1", "header2"], rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]], )) s3_manager = S3DataManager(conn) object_uri = f"s3://{bucket_name}/test_object.csv.gz" with mock.patch.object(logger, "info") as mock_log_info: gzip_csv = s3_manager.read_gzip_csv(object_uri) list(gzip_csv) mock_log_info.assert_called_once_with( f"Reading file from: {object_uri}", extra={ "event": "READING_FILE_FROM_S3", "object_uri": object_uri }, )