Example #1
0
def test_returns_csv_row_as_dictionary():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3_object = bucket.Object("test_object.csv.gz")
    s3_object.put(Body=build_gzip_csv(
        header=["header1", "header2"],
        rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]],
    ))

    s3_manager = S3DataManager(conn)

    expected = [
        {
            "header1": "row1-col1",
            "header2": "row1-col2"
        },
        {
            "header1": "row2-col1",
            "header2": "row2-col2"
        },
    ]

    actual = s3_manager.read_gzip_csv("s3://test_bucket/test_object.csv.gz")

    assert list(actual) == expected
Example #2
0
def test_will_log_writing_file_events():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket_name = "test_bucket"
    conn.create_bucket(Bucket=bucket_name)
    data = {"fruit": "mango"}

    s3_manager = S3DataManager(conn)
    object_uri = f"s3://{bucket_name}/test_object.json"

    with mock.patch.object(logger, "info") as mock_log_info:
        s3_manager.write_json(object_uri, data, metadata=SOME_METADATA)
        mock_log_info.assert_has_calls([
            mock.call(
                f"Attempting to upload: {object_uri}",
                extra={
                    "event": "ATTEMPTING_UPLOAD_JSON_TO_S3",
                    "object_uri": object_uri
                },
            ),
            mock.call(
                f"Successfully uploaded to: {object_uri}",
                extra={
                    "event": "UPLOADED_JSON_TO_S3",
                    "object_uri": object_uri
                },
            ),
        ])
Example #3
0
def test_writes_correct_content_type():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    data = {"fruit": "mango"}
    s3_manager = S3DataManager(conn)

    expected = "application/json"

    s3_manager.write_json("s3://test_bucket/test_object.json",
                          data,
                          metadata=SOME_METADATA)

    actual = bucket.Object("test_object.json").get()["ContentType"]

    assert actual == expected
Example #4
0
def test_writes_dictionary_with_timestamp():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3 = S3DataManager(conn)
    data = {"timestamp": datetime(2020, 7, 23)}

    expected = b'{"timestamp": "2020-07-23T00:00:00"}'

    s3.write_json("s3://test_bucket/test_object.json",
                  data,
                  metadata=SOME_METADATA)

    actual = bucket.Object("test_object.json").get()["Body"].read()

    assert actual == expected
Example #5
0
def test_writes_dictionary():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3 = S3DataManager(conn)
    data = {"fruit": "mango"}

    expected = b'{"fruit": "mango"}'

    s3.write_json("s3://test_bucket/test_object.json",
                  data,
                  metadata=SOME_METADATA)

    actual = bucket.Object("test_object.json").get()["Body"].read()

    assert actual == expected
Example #6
0
def test_write_json_will_write_metadata_when_supplied():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket_name = "test_bucket"
    bucket = conn.create_bucket(Bucket=bucket_name)
    data = {"fruit": "mango"}
    s3_manager = S3DataManager(conn)

    metadata = {
        "metadata_field": "metadata_field_value",
        "second_metadata_field": "metadata_field_second_value",
    }

    s3_manager.write_json(object_uri=f"s3://{bucket_name}/test_object.json",
                          data=data,
                          metadata=metadata)

    expected = metadata
    actual = bucket.Object("test_object.json").get()["Metadata"]

    assert actual == expected
    def __init__(self, config):
        self._s3_client = boto3.resource("s3",
                                         endpoint_url=config.s3_endpoint_url)
        self._s3_manager = S3DataManager(self._s3_client)

        self._config = config
        self._uris = OdsDownloaderS3UriResolver(
            asid_lookup_bucket=self._config.mapping_bucket,
            ods_metadata_bucket=self._config.output_bucket,
        )

        ods_client = OdsPortalClient(search_url=self._config.search_url)
        ods_data_fetcher = OdsPortalDataFetcher(ods_client=ods_client)
        probe = MetadataServiceObservabilityProbe()
        self._metadata_service = Gp2gpOrganisationMetadataService(
            data_fetcher=ods_data_fetcher, observability_probe=probe)

        self._output_metadata = {
            "date-anchor": self._config.date_anchor.isoformat(),
            "build-tag": self._config.build_tag,
        }
Example #8
0
def test_will_log_reading_file_event():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket_name = "test_bucket"
    bucket = conn.create_bucket(Bucket=bucket_name)
    s3_object = bucket.Object("test_object.csv.gz")
    s3_object.put(Body=build_gzip_csv(
        header=["header1", "header2"],
        rows=[["row1-col1", "row1-col2"], ["row2-col1", "row2-col2"]],
    ))

    s3_manager = S3DataManager(conn)
    object_uri = f"s3://{bucket_name}/test_object.csv.gz"

    with mock.patch.object(logger, "info") as mock_log_info:
        gzip_csv = s3_manager.read_gzip_csv(object_uri)
        list(gzip_csv)
        mock_log_info.assert_called_once_with(
            f"Reading file from: {object_uri}",
            extra={
                "event": "READING_FILE_FROM_S3",
                "object_uri": object_uri
            },
        )