Ejemplo n.º 1
0
def test_returns_csv_row_as_dictionary():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3_object = bucket.Object("test_object.csv.gz")
    s3_object.put(Body=build_gzip_csv(
        header=["id", "message", "comment"],
        rows=[["123", "A message", "A comment"],
              ["321", "Another message", "Another comment"]],
    ))

    s3_manager = S3DataManager(conn)

    expected = [
        {
            "id": "123",
            "message": "A message",
            "comment": "A comment"
        },
        {
            "id": "321",
            "message": "Another message",
            "comment": "Another comment"
        },
    ]

    actual = s3_manager.read_gzip_csv("s3://test_bucket/test_object.csv.gz")

    assert list(actual) == expected
def test_will_log_writing_table_events():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket_name = "test_bucket"
    conn.create_bucket(Bucket=bucket_name)
    data = {"fruit": ["mango", "lemon"]}
    fruit_table = pa.table(data)

    s3_manager = S3DataManager(conn)

    object_uri = f"s3://{bucket_name}/test_object.parquet"

    with mock.patch.object(logger, "info") as mock_log_info:
        s3_manager.write_parquet(fruit_table, object_uri, SOME_METADATA)
        mock_log_info.assert_has_calls([
            mock.call(
                f"Attempting to upload: {object_uri}",
                extra={
                    "event": "ATTEMPTING_UPLOAD_PARQUET_TO_S3",
                    "object_uri": object_uri
                },
            ),
            mock.call(
                f"Successfully uploaded to: {object_uri}",
                extra={
                    "event": "SUCCESSFULLY_UPLOADED_PARQUET_TO_S3",
                    "object_uri": object_uri,
                },
            ),
        ])
def test_read_spine_messages_reads_multiple_messages():
    csv_rows = [build_spine_item(guid=f"guid{i}") for i in range(10)]

    mock_s3_conn = MockS3(
        objects=[
            MockS3Object(
                bucket="test_bucket",
                key="data/1.csv.gz",
                contents=_spine_csv_gz(csv_rows[:4]),
            ),
            MockS3Object(
                bucket="test_bucket",
                key="data/2.csv.gz",
                contents=_spine_csv_gz(csv_rows[4:]),
            ),
        ]
    )

    io = TransferClassifierIO(s3_data_manager=S3DataManager(mock_s3_conn))

    expected_guids = [f"guid{i}" for i in range(10)]

    actual_messages = io.read_spine_messages(
        ["s3://test_bucket/data/1.csv.gz", "s3://test_bucket/data/2.csv.gz"]
    )

    actual_guids = [message.guid for message in actual_messages]

    assert actual_guids == expected_guids
Ejemplo n.º 4
0
def test_write_transfers_correctly_writes_all_fields():
    mock_s3 = MockS3()
    s3_data_manager = S3DataManager(mock_s3)
    io = TransferClassifierIO(s3_data_manager)

    transfer = Transfer(
        conversation_id="1234",
        sla_duration=timedelta(days=1),
        requesting_practice=Practice(asid="123",
                                     supplier="Supplier A",
                                     ods_code="A12"),
        sending_practice=Practice(asid="456",
                                  supplier="Supplier B",
                                  ods_code="B12"),
        sender_error_codes=[1, None],
        final_error_codes=[None, 32],
        intermediate_error_codes=[],
        outcome=TransferOutcome(
            status=TransferStatus.PROCESS_FAILURE,
            failure_reason=TransferFailureReason.FINAL_ERROR),
        date_requested=datetime(year=2021, month=3, day=5),
        date_completed=None,
        last_sender_message_timestamp=None,
    )

    io.write_transfers(transfers=[transfer],
                       s3_uri="s3://a_bucket/some_data.parquet",
                       metadata=_SOME_METADATA)

    expected_table = {
        "conversation_id": ["1234"],
        "sla_duration": [86400],
        "requesting_practice_asid": ["123"],
        "requesting_practice_ods_code": ["A12"],
        "sending_practice_asid": ["456"],
        "sending_practice_ods_code": ["B12"],
        "requesting_supplier": ["Supplier A"],
        "sending_supplier": ["Supplier B"],
        "sender_error_codes": [[1, None]],
        "final_error_codes": [[None, 32]],
        "intermediate_error_codes": [[]],
        "status": ["Process failure"],
        "failure_reason": ["Final error"],
        "date_requested": [datetime(year=2021, month=3, day=5)],
        "date_completed": [None],
        "last_sender_message_timestamp": [None],
    }

    actual_table = mock_s3.object(
        "a_bucket", "some_data.parquet").read_parquet().to_pydict()

    assert actual_table == expected_table
def test_read_json_object_returns_dictionary():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3_object = bucket.Object("test_object.json")
    s3_object.put(Body=b'{"fruit": "mango"}')

    s3_manager = S3DataManager(conn)

    expected = {"fruit": "mango"}

    actual = s3_manager.read_json("s3://test_bucket/test_object.json")

    assert actual == expected
def test_writes_table_as_parquet():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")

    data = {"fruit": ["mango", "lemon"]}
    fruit_table = pa.table(data)

    s3_manager = S3DataManager(conn)
    s3_manager.write_parquet(fruit_table, "s3://test_bucket/fruits.parquet",
                             SOME_METADATA)

    actual = read_s3_parquet(bucket, "fruits.parquet")

    assert actual == data
def test_will_log_reading_file_event():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket = conn.create_bucket(Bucket="test_bucket")
    s3_object = bucket.Object("test_object.json")
    s3_object.put(Body=b'{"fruit": "mango"}')

    s3_manager = S3DataManager(conn)
    object_uri = "s3://test_bucket/test_object.json"

    with mock.patch.object(logger, "info") as mock_log_info:
        s3_manager.read_json(object_uri)
        mock_log_info.assert_called_once_with(
            f"Reading file from: {object_uri}",
            extra={"event": "READING_FILE_FROM_S3", "object_uri": object_uri},
        )
Ejemplo n.º 8
0
def test_write_transfers_writes_metadata():
    mock_s3 = MockS3()
    s3_data_manager = S3DataManager(mock_s3)

    metadata = {a_string(): a_string()}

    io = TransferClassifierIO(s3_data_manager)

    io.write_transfers(transfers=[build_transfer()],
                       s3_uri="s3://a_bucket/some_data.parquet",
                       metadata=metadata)

    actual_meta_data = mock_s3.object("a_bucket",
                                      "some_data.parquet").get_metadata()

    assert actual_meta_data == metadata
    def __init__(self, config: TransferClassifierConfig):
        s3 = boto3.resource("s3", endpoint_url=config.s3_endpoint_url)
        s3_manager = S3DataManager(s3)

        self._reporting_window = ReportingWindow(config.start_datetime,
                                                 config.end_datetime,
                                                 config.conversation_cutoff)

        self._config = config

        self._uris = TransferClassifierS3UriResolver(
            gp2gp_spine_bucket=config.input_spine_data_bucket,
            transfers_bucket=config.output_transfer_data_bucket,
            ods_metadata_bucket=config.input_ods_metadata_bucket,
        )

        self._io = TransferClassifierIO(s3_manager)
def test_read_spine_messages_reads_single_message_correctly():
    csv_row = build_spine_item(
        time="2019-12-31T23:37:55.334+0000",
        conversation_id="abc",
        guid="message_a",
        interaction_id="an_interaction_id",
        message_sender="sender_x",
        message_recipient="recipient_y",
        message_ref="NotProvided",
        jdi_event="NONE",
        raw="",
        from_system="SupplierA",
        to_system="Unknown",
    )

    mock_s3_conn = MockS3(
        objects=[
            MockS3Object(
                bucket="test_bucket", key="data/1.csv.gz", contents=_spine_csv_gz([csv_row])
            )
        ]
    )

    io = TransferClassifierIO(s3_data_manager=S3DataManager(mock_s3_conn))

    expected_spine_message = Message(
        time=datetime(2019, 12, 31, 23, 37, 55, 334000, tzutc()),
        conversation_id="abc",
        guid="message_a",
        interaction_id="an_interaction_id",
        from_party_asid="sender_x",
        to_party_asid="recipient_y",
        message_ref=None,
        error_code=None,
        from_system="SupplierA",
        to_system="Unknown",
    )

    actual = io.read_spine_messages(["s3://test_bucket/data/1.csv.gz"])

    assert list(actual) == [expected_spine_message]
Ejemplo n.º 11
0
def test_write_transfers_correctly_writes_multiple_rows():
    mock_s3 = MockS3()
    s3_data_manager = S3DataManager(mock_s3)
    io = TransferClassifierIO(s3_data_manager)

    transfers = [
        build_transfer(conversation_id="a"),
        build_transfer(conversation_id="b"),
        build_transfer(conversation_id="c"),
    ]

    io.write_transfers(transfers=transfers,
                       s3_uri="s3://a_bucket/multi_row.parquet",
                       metadata=_SOME_METADATA)

    expected_conversation_ids = ["a", "b", "c"]

    actual_conversation_ids = (mock_s3.object(
        "a_bucket",
        "multi_row.parquet").read_parquet().to_pydict().get("conversation_id"))

    assert actual_conversation_ids == expected_conversation_ids
def test_will_write_metatdata():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket_name = "test_bucket"
    bucket = conn.create_bucket(Bucket=bucket_name)

    data = {"fruit": ["mango", "lemon"]}
    fruit_table = pa.table(data)

    metadata = {
        "metadata_field": "metadata_field_value",
        "second_metadata_field": "metadata_field_second_value",
    }

    s3_manager = S3DataManager(conn)

    s3_manager.write_parquet(
        table=fruit_table,
        object_uri=f"s3://{bucket_name}/test_object.parquet",
        metadata=metadata)

    expected = metadata
    actual = bucket.Object("test_object.parquet").get()["Metadata"]

    assert actual == expected
Ejemplo n.º 13
0
def test_will_log_reading_file_event():
    conn = boto3.resource("s3", region_name=MOTO_MOCK_REGION)
    bucket_name = "test_bucket"
    bucket = conn.create_bucket(Bucket=bucket_name)
    s3_object = bucket.Object("test_object.csv.gz")
    s3_object.put(Body=build_gzip_csv(
        header=["id", "message", "comment"],
        rows=[["123", "A message", "A comment"],
              ["321", "Another message", "Another comment"]],
    ))

    s3_manager = S3DataManager(conn)
    object_uri = f"s3://{bucket_name}/test_object.csv.gz"

    with mock.patch.object(logger, "info") as mock_log_info:
        gzip_csv = s3_manager.read_gzip_csv(object_uri)
        list(gzip_csv)
        mock_log_info.assert_called_once_with(
            f"Reading file from: {object_uri}",
            extra={
                "event": "READING_FILE_FROM_S3",
                "object_uri": object_uri
            },
        )