Example #1
0
def test_convert_one_organisation_metadata_file_to_organisation_lookup():
    s3_manager = Mock()

    transfer_classifier_io = TransferClassifierIO(s3_data_manager=s3_manager)

    s3_manager.read_json.return_value = _ORGANISATION_METADATA_DICT_FIRST_MONTH

    actual_metadatas = transfer_classifier_io.read_ods_metadata_files(
        s3_uris=[_S3_URI])
    actual_organisation_lookup = actual_metadatas.get_lookup(
        _DATE_ANCHOR_YEAR_MONTH)

    expected_first_month_practices = [
        PracticeDetails(ods_code="ABC", name="A Practice", asids=["123"])
    ]
    expected_first_month_ccgs = [
        CcgDetails(ods_code="XYZ", name="A CCG", practices=["ABC"])
    ]
    expected_organisation_lookup = OrganisationLookup(
        expected_first_month_practices, expected_first_month_ccgs)

    assert actual_organisation_lookup.practice_ods_code_from_asid(
        "123") == expected_organisation_lookup.practice_ods_code_from_asid(
            "123")

    s3_manager.read_json.assert_called_once_with(_S3_URI)
def test_read_spine_messages_reads_multiple_messages():
    csv_rows = [build_spine_item(guid=f"guid{i}") for i in range(10)]

    mock_s3_conn = MockS3(
        objects=[
            MockS3Object(
                bucket="test_bucket",
                key="data/1.csv.gz",
                contents=_spine_csv_gz(csv_rows[:4]),
            ),
            MockS3Object(
                bucket="test_bucket",
                key="data/2.csv.gz",
                contents=_spine_csv_gz(csv_rows[4:]),
            ),
        ]
    )

    io = TransferClassifierIO(s3_data_manager=S3DataManager(mock_s3_conn))

    expected_guids = [f"guid{i}" for i in range(10)]

    actual_messages = io.read_spine_messages(
        ["s3://test_bucket/data/1.csv.gz", "s3://test_bucket/data/2.csv.gz"]
    )

    actual_guids = [message.guid for message in actual_messages]

    assert actual_guids == expected_guids
Example #3
0
def test_write_transfers_correctly_writes_all_fields():
    mock_s3 = MockS3()
    s3_data_manager = S3DataManager(mock_s3)
    io = TransferClassifierIO(s3_data_manager)

    transfer = Transfer(
        conversation_id="1234",
        sla_duration=timedelta(days=1),
        requesting_practice=Practice(asid="123",
                                     supplier="Supplier A",
                                     ods_code="A12"),
        sending_practice=Practice(asid="456",
                                  supplier="Supplier B",
                                  ods_code="B12"),
        sender_error_codes=[1, None],
        final_error_codes=[None, 32],
        intermediate_error_codes=[],
        outcome=TransferOutcome(
            status=TransferStatus.PROCESS_FAILURE,
            failure_reason=TransferFailureReason.FINAL_ERROR),
        date_requested=datetime(year=2021, month=3, day=5),
        date_completed=None,
        last_sender_message_timestamp=None,
    )

    io.write_transfers(transfers=[transfer],
                       s3_uri="s3://a_bucket/some_data.parquet",
                       metadata=_SOME_METADATA)

    expected_table = {
        "conversation_id": ["1234"],
        "sla_duration": [86400],
        "requesting_practice_asid": ["123"],
        "requesting_practice_ods_code": ["A12"],
        "sending_practice_asid": ["456"],
        "sending_practice_ods_code": ["B12"],
        "requesting_supplier": ["Supplier A"],
        "sending_supplier": ["Supplier B"],
        "sender_error_codes": [[1, None]],
        "final_error_codes": [[None, 32]],
        "intermediate_error_codes": [[]],
        "status": ["Process failure"],
        "failure_reason": ["Final error"],
        "date_requested": [datetime(year=2021, month=3, day=5)],
        "date_completed": [None],
        "last_sender_message_timestamp": [None],
    }

    actual_table = mock_s3.object(
        "a_bucket", "some_data.parquet").read_parquet().to_pydict()

    assert actual_table == expected_table
Example #4
0
def test_convert_two_organisation_metadata_files_to_organisation_lookup_mapping(
):
    s3_manager = Mock()

    transfer_classifier_io = TransferClassifierIO(s3_data_manager=s3_manager)

    s3_manager.read_json.side_effect = [
        _ORGANISATION_METADATA_DICT_FIRST_MONTH,
        _ORGANISATION_METADATA_DICT_ADDITIONAL_MONTH,
    ]

    actual_metadatas = transfer_classifier_io.read_ods_metadata_files(
        s3_uris=[_S3_URI, _S3_URI_ADDITIONAL_MONTH])
    expected_first_month_practices = [
        PracticeDetails(ods_code="ABC", name="A Practice", asids=["123"])
    ]
    expected_first_month_ccgs = [
        CcgDetails(ods_code="XYZ", name="A CCG", practices=["ABC"])
    ]
    expected_first_organisation_lookup = OrganisationLookup(
        expected_first_month_practices, expected_first_month_ccgs)

    actual_first_organisation_lookup = actual_metadatas.get_lookup(
        _DATE_ANCHOR_YEAR_MONTH)

    assert actual_first_organisation_lookup.practice_ods_code_from_asid(
        "123"
    ) == expected_first_organisation_lookup.practice_ods_code_from_asid("123")

    expected_second_month_practices = [
        PracticeDetails(ods_code="A12345",
                        name="GP Practice",
                        asids=["123456789123"])
    ]
    expected_second_month_ccgs = [
        CcgDetails(ods_code="22A", name="CCG", practices=["A12345"])
    ]
    expected_second_organisation_lookup = OrganisationLookup(
        expected_second_month_practices, expected_second_month_ccgs)

    actual_second_organisation_lookup = actual_metadatas.get_lookup(
        _DATE_ANCHOR_ADDITIONAL_YEAR_MONTH)

    assert actual_second_organisation_lookup.practice_ods_code_from_asid(
        "A12345"
    ) == expected_second_organisation_lookup.practice_ods_code_from_asid(
        "A12345")

    expected_s3_manager_read_json_calls = [
        call(_S3_URI), call(_S3_URI_ADDITIONAL_MONTH)
    ]
    s3_manager.read_json.assert_has_calls(expected_s3_manager_read_json_calls)
Example #5
0
def test_write_transfers_writes_metadata():
    mock_s3 = MockS3()
    s3_data_manager = S3DataManager(mock_s3)

    metadata = {a_string(): a_string()}

    io = TransferClassifierIO(s3_data_manager)

    io.write_transfers(transfers=[build_transfer()],
                       s3_uri="s3://a_bucket/some_data.parquet",
                       metadata=metadata)

    actual_meta_data = mock_s3.object("a_bucket",
                                      "some_data.parquet").get_metadata()

    assert actual_meta_data == metadata
    def __init__(self, config: TransferClassifierConfig):
        s3 = boto3.resource("s3", endpoint_url=config.s3_endpoint_url)
        s3_manager = S3DataManager(s3)

        self._reporting_window = ReportingWindow(config.start_datetime,
                                                 config.end_datetime,
                                                 config.conversation_cutoff)

        self._config = config

        self._uris = TransferClassifierS3UriResolver(
            gp2gp_spine_bucket=config.input_spine_data_bucket,
            transfers_bucket=config.output_transfer_data_bucket,
            ods_metadata_bucket=config.input_ods_metadata_bucket,
        )

        self._io = TransferClassifierIO(s3_manager)
def test_read_spine_messages_reads_single_message_correctly():
    csv_row = build_spine_item(
        time="2019-12-31T23:37:55.334+0000",
        conversation_id="abc",
        guid="message_a",
        interaction_id="an_interaction_id",
        message_sender="sender_x",
        message_recipient="recipient_y",
        message_ref="NotProvided",
        jdi_event="NONE",
        raw="",
        from_system="SupplierA",
        to_system="Unknown",
    )

    mock_s3_conn = MockS3(
        objects=[
            MockS3Object(
                bucket="test_bucket", key="data/1.csv.gz", contents=_spine_csv_gz([csv_row])
            )
        ]
    )

    io = TransferClassifierIO(s3_data_manager=S3DataManager(mock_s3_conn))

    expected_spine_message = Message(
        time=datetime(2019, 12, 31, 23, 37, 55, 334000, tzutc()),
        conversation_id="abc",
        guid="message_a",
        interaction_id="an_interaction_id",
        from_party_asid="sender_x",
        to_party_asid="recipient_y",
        message_ref=None,
        error_code=None,
        from_system="SupplierA",
        to_system="Unknown",
    )

    actual = io.read_spine_messages(["s3://test_bucket/data/1.csv.gz"])

    assert list(actual) == [expected_spine_message]
Example #8
0
def test_write_transfers_correctly_writes_multiple_rows():
    mock_s3 = MockS3()
    s3_data_manager = S3DataManager(mock_s3)
    io = TransferClassifierIO(s3_data_manager)

    transfers = [
        build_transfer(conversation_id="a"),
        build_transfer(conversation_id="b"),
        build_transfer(conversation_id="c"),
    ]

    io.write_transfers(transfers=transfers,
                       s3_uri="s3://a_bucket/multi_row.parquet",
                       metadata=_SOME_METADATA)

    expected_conversation_ids = ["a", "b", "c"]

    actual_conversation_ids = (mock_s3.object(
        "a_bucket",
        "multi_row.parquet").read_parquet().to_pydict().get("conversation_id"))

    assert actual_conversation_ids == expected_conversation_ids
class TransferClassifier:
    def __init__(self, config: TransferClassifierConfig):
        s3 = boto3.resource("s3", endpoint_url=config.s3_endpoint_url)
        s3_manager = S3DataManager(s3)

        self._reporting_window = ReportingWindow(config.start_datetime,
                                                 config.end_datetime,
                                                 config.conversation_cutoff)

        self._config = config

        self._uris = TransferClassifierS3UriResolver(
            gp2gp_spine_bucket=config.input_spine_data_bucket,
            transfers_bucket=config.output_transfer_data_bucket,
            ods_metadata_bucket=config.input_ods_metadata_bucket,
        )

        self._io = TransferClassifierIO(s3_manager)

    def _read_spine_messages(self) -> Iterator[Message]:
        input_paths = self._uris.spine_messages(self._reporting_window)
        return self._io.read_spine_messages(input_paths)

    def _read_ods_metadata(self) -> OrganisationMetadataMonthly:
        input_paths = self._uris.ods_metadata(self._reporting_window)
        return self._io.read_ods_metadata_files(input_paths)

    def _write_transfers(
        self,
        transfers: Iterator[Transfer],
        daily_start_datetime: datetime,
        cutoff: timedelta,
        metadata: Dict[str, str],
    ):
        output_path = self._uris.gp2gp_transfers(
            daily_start_datetime=daily_start_datetime, cutoff=cutoff)
        self._io.write_transfers(transfers, output_path, metadata)

    def _construct_json_log_date_range_info(self) -> dict:
        reporting_window_dates = self._reporting_window.get_dates()
        reporting_window_overflow_dates = self._reporting_window.get_overflow_dates(
        )
        return {
            "config_start_datetime":
            convert_to_datetime_string(self._config.start_datetime),
            "config_end_datetime":
            convert_to_datetime_string(self._config.end_datetime),
            "conversation_cutoff":
            str(self._config.conversation_cutoff),
            "reporting_window_dates":
            convert_to_datetimes_string(reporting_window_dates),
            "reporting_window_overflow_dates":
            convert_to_datetimes_string(reporting_window_overflow_dates),
        }

    def run(self):
        transfer_observability_probe = TransferObservabilityProbe(
            logger=module_logger)

        log_date_range_info = self._construct_json_log_date_range_info()
        logger.info(
            "Attempting to classify conversations for a date range",
            extra={
                "event": "ATTEMPTING_CLASSIFY_CONVERSATIONS_FOR_A_DATE_RANGE",
                **log_date_range_info,
            },
        )

        spine_messages = self._read_spine_messages()
        ods_metadata_monthly = self._read_ods_metadata()

        transfer_service = TransferService(
            message_stream=spine_messages,
            cutoff=self._config.conversation_cutoff,
            observability_probe=transfer_observability_probe,
        )

        conversations = transfer_service.group_into_conversations()
        gp2gp_conversations = transfer_service.parse_conversations_into_gp2gp_conversations(
            conversations)

        for daily_start_datetime in self._reporting_window.get_dates():
            metadata = {
                "cutoff-days":
                str(self._config.conversation_cutoff.days),
                "build-tag":
                self._config.build_tag,
                "start-datetime":
                convert_to_datetime_string(daily_start_datetime),
                "end-datetime":
                convert_to_datetime_string(daily_start_datetime +
                                           timedelta(days=1)),
                "ods-metadata-month":
                f"{daily_start_datetime.year}-{daily_start_datetime.month}",
            }

            conversations_started_in_reporting_window = filter_conversations_by_day(
                gp2gp_conversations, daily_start_datetime)
            organisation_lookup = ods_metadata_monthly.get_lookup(
                (daily_start_datetime.year, daily_start_datetime.month))
            transfers = transfer_service.convert_to_transfers(
                conversations_started_in_reporting_window,
                organisation_lookup=organisation_lookup)
            self._write_transfers(
                transfers=transfers,
                daily_start_datetime=daily_start_datetime,
                cutoff=self._config.conversation_cutoff,
                metadata=metadata,
            )

        logger.info(
            "Successfully classified conversations for a date range",
            extra={
                "event": "CLASSIFIED_CONVERSATIONS_FOR_A_DATE_RANGE",
                **log_date_range_info,
            },
        )