def test_invalid_arguments(): with pytest.raises(ValueError, match="Argument `freq` must be one of: 'day', 'week'."): HourInterval(start_hour="00:00", stop_hour="08:00", freq="foobar") with pytest.raises( ValueError, match="If freq='week' then the `weekday` argument must be provided." ): HourInterval(start_hour="00:00", stop_hour="08:00", freq="week", weekday=None) with pytest.raises(ValueError, match="Invalid value for `weekday`."): HourInterval(start_hour="00:00", stop_hour="08:00", freq="week", weekday="foobar") with pytest.raises( ValueError, match= "If freq='day' then the `weekday` argument must not be provided.", ): HourInterval(start_hour="00:00", stop_hour="08:00", freq="day", weekday="Monday")
def test_daily_hour_slice_without_stop_hour(): hs = HourInterval(start_hour="07:20", stop_hour=None, freq="day") assert hs.start_hour == "07:20" assert hs.stop_hour.is_missing assert hs.period.freq == "day" expr = hs.filter_timestamp_column(EventsCallsTable.datetime) expected = "to_char(events.calls.datetime, 'HH24:MI') >= '07:20'" assert expected == get_string_representation(expr)
def test_daily_hour_slice(): hs = HourInterval(start_hour="00:00", stop_hour="06:30", freq="day") assert hs.start_hour == "00:00" assert hs.stop_hour == "06:30" assert hs.period.freq == "day" expr = hs.filter_timestamp_column(EventsCallsTable.datetime) expected = "to_char(events.calls.datetime, 'HH24:MI') >= '00:00' AND to_char(events.calls.datetime, 'HH24:MI') < '06:30'" assert expected == get_string_representation(expr)
def test_weekly_hour_slice_without_stop_value(): hs = HourInterval(start_hour="10:00", stop_hour=None, freq="week", weekday="Saturday") assert hs.start_hour == "10:00" assert hs.stop_hour.is_missing assert hs.period.freq == "week" assert hs.period.weekday == "Saturday" ts_col = EventsCallsTable.datetime expr = hs.filter_timestamp_column(ts_col) expected = ("to_char(events.calls.datetime, 'HH24:MI') >= '10:00' AND " "EXTRACT(isodow FROM events.calls.datetime) = 6") assert expected == get_string_representation(expr)
def test_weekly_hour_slice(): hs = HourInterval(start_hour="04:00", stop_hour="07:45", freq="week", weekday="tuesday") assert hs.start_hour == "04:00" assert hs.stop_hour == "07:45" assert hs.period.freq == "week" assert hs.period.weekday == "Tuesday" ts_col = EventsCallsTable.datetime expr = hs.filter_timestamp_column(ts_col) expected = ("to_char(events.calls.datetime, 'HH24:MI') >= '04:00' AND " "to_char(events.calls.datetime, 'HH24:MI') < '07:45' AND " "EXTRACT(isodow FROM events.calls.datetime) = 2") assert expected == get_string_representation(expr)
def test_multiple_our_slices(): hs1 = HourInterval(start_hour="08:00", stop_hour="16:30", freq="day") hs2 = HourInterval(start_hour="10:00", stop_hour="18:45", freq="week", weekday="Thursday") mhs = HourSlice(hour_intervals=[hs1, hs2]) ts_col = EventsCallsTable.datetime expr = mhs.get_subsetting_condition(ts_col) expected = ("to_char(events.calls.datetime, 'HH24:MI') >= '08:00' AND " "to_char(events.calls.datetime, 'HH24:MI') < '16:30' OR " "to_char(events.calls.datetime, 'HH24:MI') >= '10:00' AND " "to_char(events.calls.datetime, 'HH24:MI') < '18:45' AND " "EXTRACT(isodow FROM events.calls.datetime) = 4") assert expected == get_string_representation(expr)
def __init__( self, *, start=None, stop=None, hours="all", hour_slices=None, table="events.calls", subscriber_subset=None, columns=["*"], subscriber_identifier="msisdn", ): if hours != "all" and hour_slices is not None: raise ValueError( "The arguments `hours` and `hour_slice` are mutually exclusive." ) if hours != "all": assert (isinstance(hours, tuple) and len(hours) == 2 and isinstance(hours[0], int) and isinstance(hours[1], int)) # sanity check start_hour = hours[0] stop_hour = hours[1] start_hour_str = f"{start_hour:02d}:00" stop_hour_str = f"{stop_hour:02d}:00" if start_hour <= stop_hour: hs = HourInterval(start_hour=start_hour_str, stop_hour=stop_hour_str, freq="day") self.hour_slices = HourSlice(hour_intervals=[hs]) else: # If hours are backwards, then this is interpreted as spanning midnight, # so we split it into two time slices for the beginning/end of the day. hs1 = HourInterval(start_hour=None, stop_hour=stop_hour_str, freq="day") hs2 = HourInterval(start_hour=start_hour_str, stop_hour=None, freq="day") self.hour_slices = HourSlice(hour_intervals=[hs1, hs2]) else: self.hour_slices = HourSlice(hour_intervals=[]) self.start = standardise_date(start) self.stop = standardise_date(stop) self.hours = hours self.subscriber_subsetter = make_subscriber_subsetter( subscriber_subset) self.subscriber_identifier = subscriber_identifier.lower() if columns == ["*"]: self.table_ORIG = Table(table) columns = self.table_ORIG.column_names else: self.table_ORIG = Table(table, columns=columns) self.columns = set(columns) try: self.columns.remove(subscriber_identifier) self.columns.add(f"{subscriber_identifier} AS subscriber") except KeyError: if self.subscriber_subsetter.is_proper_subset: warnings.warn( f"No subscriber column requested, did you mean to include {subscriber_identifier} in columns? " "Since you passed a subscriber_subset the data will still be subset by your subscriber subset, " "but the subscriber column will not be present in the output.", stacklevel=2, ) self.columns = sorted(self.columns) self.sqlalchemy_table = get_sqlalchemy_table_definition( self.table_ORIG.fully_qualified_table_name, engine=get_db().engine, ) if self.start == self.stop: raise ValueError("Start and stop are the same.") super().__init__() # This needs to happen after the parent classes init method has been # called as it relies upon the connection object existing self._check_dates()