Beispiel #1
0
def test_returns_dates_list_when_today_time_is_bst():
    reporting_window = ReportingWindow(
        start_datetime=None, end_datetime=None, conversation_cutoff=timedelta(days=0)
    )

    actual_dates = reporting_window.get_dates()
    expected_dates = [datetime(year=2021, month=4, day=30, hour=0, minute=0, second=0, tzinfo=UTC)]

    assert actual_dates == expected_dates
Beispiel #2
0
def test_returns_list_of_overflow_dates_depending_on_cutoff_when_start_and_end_datetime_are_none(
    cutoff_days, expected_overflow_dates
):
    reporting_window = ReportingWindow(
        start_datetime=None, end_datetime=None, conversation_cutoff=timedelta(days=cutoff_days)
    )

    actual_dates = reporting_window.get_overflow_dates()

    assert actual_dates == expected_overflow_dates
Beispiel #3
0
def test_returns_dates_list_from_yesterday_midnight_minus_cutoff_when_datetimes_are_none(
    cutoff_days, expected_dates
):
    reporting_window = ReportingWindow(
        start_datetime=None, end_datetime=None, conversation_cutoff=timedelta(days=cutoff_days)
    )

    actual_dates = reporting_window.get_dates()

    assert actual_dates == expected_dates
Beispiel #4
0
def test_returns_empty_list_given_cutoff_of_0():
    start_datetime = datetime(year=2022, month=1, day=12, hour=0, minute=0, second=0, tzinfo=UTC)
    end_datetime = datetime(year=2022, month=1, day=13, hour=0, minute=0, second=0, tzinfo=UTC)

    reporting_window = ReportingWindow(
        start_datetime, end_datetime, conversation_cutoff=timedelta(days=0)
    )

    expected_overflow_dates: List = []

    actual = reporting_window.get_overflow_dates()

    assert actual == expected_overflow_dates
Beispiel #5
0
 def spine_messages(self, reporting_window: ReportingWindow) -> List[str]:
     dates = reporting_window.get_dates(
     ) + reporting_window.get_overflow_dates()
     return [
         self._s3_path(
             self._gp2gp_spine_bucket,
             self._SPINE_MESSAGES_VERSION,
             f"{add_leading_zero(date.year)}",
             f"{add_leading_zero(date.month)}",
             f"{add_leading_zero(date.day)}",
             self._spine_message_filename(date),
         ) for date in dates
     ]
Beispiel #6
0
def test_get_overflow_dates_returns_list_of_datetimes_within_cutoff_period():
    start_datetime = datetime(year=2019, month=12, day=30, hour=0, minute=0, second=0, tzinfo=UTC)
    end_datetime = datetime(year=2019, month=12, day=31, hour=0, minute=0, second=0, tzinfo=UTC)
    conversation_cutoff = timedelta(days=3)

    reporting_window = ReportingWindow(start_datetime, end_datetime, conversation_cutoff)

    expected_overflow_dates = [
        datetime(year=2019, month=12, day=31, tzinfo=UTC),
        datetime(year=2020, month=1, day=1, tzinfo=UTC),
        datetime(year=2020, month=1, day=2, tzinfo=UTC),
    ]

    actual = reporting_window.get_overflow_dates()

    assert actual == expected_overflow_dates
    def __init__(self, config: TransferClassifierConfig):
        s3 = boto3.resource("s3", endpoint_url=config.s3_endpoint_url)
        s3_manager = S3DataManager(s3)

        self._reporting_window = ReportingWindow(config.start_datetime,
                                                 config.end_datetime,
                                                 config.conversation_cutoff)

        self._config = config

        self._uris = TransferClassifierS3UriResolver(
            gp2gp_spine_bucket=config.input_spine_data_bucket,
            transfers_bucket=config.output_transfer_data_bucket,
            ods_metadata_bucket=config.input_ods_metadata_bucket,
        )

        self._io = TransferClassifierIO(s3_manager)
Beispiel #8
0
def test_get_dates_returns_list_of_datetimes_within_reporting_window():
    start_datetime = datetime(year=2021, month=12, day=30, tzinfo=UTC)
    end_datetime = datetime(year=2022, month=1, day=3, tzinfo=UTC)
    conversation_cutoff = timedelta(days=14)

    reporting_window = ReportingWindow(start_datetime, end_datetime, conversation_cutoff)

    expected = [
        datetime(year=2021, month=12, day=30, tzinfo=UTC),
        datetime(year=2021, month=12, day=31, tzinfo=UTC),
        datetime(year=2022, month=1, day=1, tzinfo=UTC),
        datetime(year=2022, month=1, day=2, tzinfo=UTC),
    ]

    actual = reporting_window.get_dates()

    assert actual == expected
Beispiel #9
0
def test_throws_value_error_given_end_datetime_but_no_start_datetime():
    end_datetime = datetime(year=2019, month=12, day=31, hour=0, minute=0, second=0, tzinfo=UTC)
    conversation_cutoff = timedelta(days=3)

    with pytest.raises(ValueError) as e:
        ReportingWindow(
            start_datetime=None, end_datetime=end_datetime, conversation_cutoff=conversation_cutoff
        )
    assert str(e.value) == "Start datetime must be provided if end datetime is provided"
Beispiel #10
0
 def ods_metadata(self, reporting_window: ReportingWindow) -> List[str]:
     return [
         self._s3_path(
             self._ods_metadata_bucket,
             self._ODS_METADATA_VERSION,
             f"{date.year}/{date.month}",
             "organisationMetadata.json",
         ) for date in reporting_window.get_dates()
     ]
Beispiel #11
0
def test_throws_value_error_given_start_datetime_is_after_end_datetime():
    start_datetime = datetime(year=2019, month=12, day=2, hour=0, minute=0, second=0, tzinfo=UTC)
    end_datetime = datetime(year=2019, month=12, day=1, hour=0, minute=0, second=0, tzinfo=UTC)
    conversation_cutoff = timedelta(days=3)

    with pytest.raises(ValueError) as e:
        ReportingWindow(
            start_datetime=start_datetime,
            end_datetime=end_datetime,
            conversation_cutoff=conversation_cutoff,
        )
    assert str(e.value) == "Start datetime must be before end datetime"
Beispiel #12
0
def test_throws_value_error_given_datetimes_that_are_not_midnight(start_hour, end_hour):
    start_datetime = datetime(
        year=2019, month=12, day=1, hour=start_hour, minute=0, second=0, tzinfo=UTC
    )
    end_datetime = datetime(
        year=2019, month=12, day=2, hour=end_hour, minute=0, second=0, tzinfo=UTC
    )
    conversation_cutoff = timedelta(days=3)

    with pytest.raises(ValueError) as e:
        ReportingWindow(
            start_datetime=start_datetime,
            end_datetime=end_datetime,
            conversation_cutoff=conversation_cutoff,
        )
    assert str(e.value) == "Datetime must be at midnight"
class TransferClassifier:
    def __init__(self, config: TransferClassifierConfig):
        s3 = boto3.resource("s3", endpoint_url=config.s3_endpoint_url)
        s3_manager = S3DataManager(s3)

        self._reporting_window = ReportingWindow(config.start_datetime,
                                                 config.end_datetime,
                                                 config.conversation_cutoff)

        self._config = config

        self._uris = TransferClassifierS3UriResolver(
            gp2gp_spine_bucket=config.input_spine_data_bucket,
            transfers_bucket=config.output_transfer_data_bucket,
            ods_metadata_bucket=config.input_ods_metadata_bucket,
        )

        self._io = TransferClassifierIO(s3_manager)

    def _read_spine_messages(self) -> Iterator[Message]:
        input_paths = self._uris.spine_messages(self._reporting_window)
        return self._io.read_spine_messages(input_paths)

    def _read_ods_metadata(self) -> OrganisationMetadataMonthly:
        input_paths = self._uris.ods_metadata(self._reporting_window)
        return self._io.read_ods_metadata_files(input_paths)

    def _write_transfers(
        self,
        transfers: Iterator[Transfer],
        daily_start_datetime: datetime,
        cutoff: timedelta,
        metadata: Dict[str, str],
    ):
        output_path = self._uris.gp2gp_transfers(
            daily_start_datetime=daily_start_datetime, cutoff=cutoff)
        self._io.write_transfers(transfers, output_path, metadata)

    def _construct_json_log_date_range_info(self) -> dict:
        reporting_window_dates = self._reporting_window.get_dates()
        reporting_window_overflow_dates = self._reporting_window.get_overflow_dates(
        )
        return {
            "config_start_datetime":
            convert_to_datetime_string(self._config.start_datetime),
            "config_end_datetime":
            convert_to_datetime_string(self._config.end_datetime),
            "conversation_cutoff":
            str(self._config.conversation_cutoff),
            "reporting_window_dates":
            convert_to_datetimes_string(reporting_window_dates),
            "reporting_window_overflow_dates":
            convert_to_datetimes_string(reporting_window_overflow_dates),
        }

    def run(self):
        transfer_observability_probe = TransferObservabilityProbe(
            logger=module_logger)

        log_date_range_info = self._construct_json_log_date_range_info()
        logger.info(
            "Attempting to classify conversations for a date range",
            extra={
                "event": "ATTEMPTING_CLASSIFY_CONVERSATIONS_FOR_A_DATE_RANGE",
                **log_date_range_info,
            },
        )

        spine_messages = self._read_spine_messages()
        ods_metadata_monthly = self._read_ods_metadata()

        transfer_service = TransferService(
            message_stream=spine_messages,
            cutoff=self._config.conversation_cutoff,
            observability_probe=transfer_observability_probe,
        )

        conversations = transfer_service.group_into_conversations()
        gp2gp_conversations = transfer_service.parse_conversations_into_gp2gp_conversations(
            conversations)

        for daily_start_datetime in self._reporting_window.get_dates():
            metadata = {
                "cutoff-days":
                str(self._config.conversation_cutoff.days),
                "build-tag":
                self._config.build_tag,
                "start-datetime":
                convert_to_datetime_string(daily_start_datetime),
                "end-datetime":
                convert_to_datetime_string(daily_start_datetime +
                                           timedelta(days=1)),
                "ods-metadata-month":
                f"{daily_start_datetime.year}-{daily_start_datetime.month}",
            }

            conversations_started_in_reporting_window = filter_conversations_by_day(
                gp2gp_conversations, daily_start_datetime)
            organisation_lookup = ods_metadata_monthly.get_lookup(
                (daily_start_datetime.year, daily_start_datetime.month))
            transfers = transfer_service.convert_to_transfers(
                conversations_started_in_reporting_window,
                organisation_lookup=organisation_lookup)
            self._write_transfers(
                transfers=transfers,
                daily_start_datetime=daily_start_datetime,
                cutoff=self._config.conversation_cutoff,
                metadata=metadata,
            )

        logger.info(
            "Successfully classified conversations for a date range",
            extra={
                "event": "CLASSIFIED_CONVERSATIONS_FOR_A_DATE_RANGE",
                **log_date_range_info,
            },
        )