Esempio n. 1
0
def test_invalid_arguments():
    with pytest.raises(ValueError,
                       match="Argument `freq` must be one of: 'day', 'week'."):
        HourInterval(start_hour="00:00", stop_hour="08:00", freq="foobar")

    with pytest.raises(
            ValueError,
            match="If freq='week' then the `weekday` argument must be provided."
    ):
        HourInterval(start_hour="00:00",
                     stop_hour="08:00",
                     freq="week",
                     weekday=None)

    with pytest.raises(ValueError, match="Invalid value for `weekday`."):
        HourInterval(start_hour="00:00",
                     stop_hour="08:00",
                     freq="week",
                     weekday="foobar")

    with pytest.raises(
            ValueError,
            match=
            "If freq='day' then the `weekday` argument must not be provided.",
    ):
        HourInterval(start_hour="00:00",
                     stop_hour="08:00",
                     freq="day",
                     weekday="Monday")
Esempio n. 2
0
def test_daily_hour_slice_without_stop_hour():
    hs = HourInterval(start_hour="07:20", stop_hour=None, freq="day")
    assert hs.start_hour == "07:20"
    assert hs.stop_hour.is_missing
    assert hs.period.freq == "day"

    expr = hs.filter_timestamp_column(EventsCallsTable.datetime)
    expected = "to_char(events.calls.datetime, 'HH24:MI') >= '07:20'"
    assert expected == get_string_representation(expr)
Esempio n. 3
0
def test_daily_hour_slice():
    hs = HourInterval(start_hour="00:00", stop_hour="06:30", freq="day")
    assert hs.start_hour == "00:00"
    assert hs.stop_hour == "06:30"
    assert hs.period.freq == "day"

    expr = hs.filter_timestamp_column(EventsCallsTable.datetime)
    expected = "to_char(events.calls.datetime, 'HH24:MI') >= '00:00' AND to_char(events.calls.datetime, 'HH24:MI') < '06:30'"
    assert expected == get_string_representation(expr)
Esempio n. 4
0
def test_weekly_hour_slice_without_stop_value():
    hs = HourInterval(start_hour="10:00",
                      stop_hour=None,
                      freq="week",
                      weekday="Saturday")
    assert hs.start_hour == "10:00"
    assert hs.stop_hour.is_missing
    assert hs.period.freq == "week"
    assert hs.period.weekday == "Saturday"

    ts_col = EventsCallsTable.datetime
    expr = hs.filter_timestamp_column(ts_col)
    expected = ("to_char(events.calls.datetime, 'HH24:MI') >= '10:00' AND "
                "EXTRACT(isodow FROM events.calls.datetime) = 6")
    assert expected == get_string_representation(expr)
Esempio n. 5
0
def test_weekly_hour_slice():
    hs = HourInterval(start_hour="04:00",
                      stop_hour="07:45",
                      freq="week",
                      weekday="tuesday")
    assert hs.start_hour == "04:00"
    assert hs.stop_hour == "07:45"
    assert hs.period.freq == "week"
    assert hs.period.weekday == "Tuesday"

    ts_col = EventsCallsTable.datetime
    expr = hs.filter_timestamp_column(ts_col)
    expected = ("to_char(events.calls.datetime, 'HH24:MI') >= '04:00' AND "
                "to_char(events.calls.datetime, 'HH24:MI') < '07:45' AND "
                "EXTRACT(isodow FROM events.calls.datetime) = 2")
    assert expected == get_string_representation(expr)
Esempio n. 6
0
def test_multiple_our_slices():
    hs1 = HourInterval(start_hour="08:00", stop_hour="16:30", freq="day")
    hs2 = HourInterval(start_hour="10:00",
                       stop_hour="18:45",
                       freq="week",
                       weekday="Thursday")
    mhs = HourSlice(hour_intervals=[hs1, hs2])

    ts_col = EventsCallsTable.datetime
    expr = mhs.get_subsetting_condition(ts_col)
    expected = ("to_char(events.calls.datetime, 'HH24:MI') >= '08:00' AND "
                "to_char(events.calls.datetime, 'HH24:MI') < '16:30' OR "
                "to_char(events.calls.datetime, 'HH24:MI') >= '10:00' AND "
                "to_char(events.calls.datetime, 'HH24:MI') < '18:45' AND "
                "EXTRACT(isodow FROM events.calls.datetime) = 4")
    assert expected == get_string_representation(expr)
Esempio n. 7
0
    def __init__(
        self,
        *,
        start=None,
        stop=None,
        hours="all",
        hour_slices=None,
        table="events.calls",
        subscriber_subset=None,
        columns=["*"],
        subscriber_identifier="msisdn",
    ):

        if hours != "all" and hour_slices is not None:
            raise ValueError(
                "The arguments `hours` and `hour_slice` are mutually exclusive."
            )
        if hours != "all":
            assert (isinstance(hours, tuple) and len(hours) == 2
                    and isinstance(hours[0], int)
                    and isinstance(hours[1], int))  # sanity check

            start_hour = hours[0]
            stop_hour = hours[1]
            start_hour_str = f"{start_hour:02d}:00"
            stop_hour_str = f"{stop_hour:02d}:00"
            if start_hour <= stop_hour:
                hs = HourInterval(start_hour=start_hour_str,
                                  stop_hour=stop_hour_str,
                                  freq="day")
                self.hour_slices = HourSlice(hour_intervals=[hs])
            else:
                # If hours are backwards, then this is interpreted as spanning midnight,
                # so we split it into two time slices for the beginning/end of the day.
                hs1 = HourInterval(start_hour=None,
                                   stop_hour=stop_hour_str,
                                   freq="day")
                hs2 = HourInterval(start_hour=start_hour_str,
                                   stop_hour=None,
                                   freq="day")
                self.hour_slices = HourSlice(hour_intervals=[hs1, hs2])
        else:
            self.hour_slices = HourSlice(hour_intervals=[])

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.subscriber_subsetter = make_subscriber_subsetter(
            subscriber_subset)
        self.subscriber_identifier = subscriber_identifier.lower()
        if columns == ["*"]:
            self.table_ORIG = Table(table)
            columns = self.table_ORIG.column_names
        else:
            self.table_ORIG = Table(table, columns=columns)
        self.columns = set(columns)
        try:
            self.columns.remove(subscriber_identifier)
            self.columns.add(f"{subscriber_identifier} AS subscriber")
        except KeyError:
            if self.subscriber_subsetter.is_proper_subset:
                warnings.warn(
                    f"No subscriber column requested, did you mean to include {subscriber_identifier} in columns? "
                    "Since you passed a subscriber_subset the data will still be subset by your subscriber subset, "
                    "but the subscriber column will not be present in the output.",
                    stacklevel=2,
                )
        self.columns = sorted(self.columns)

        self.sqlalchemy_table = get_sqlalchemy_table_definition(
            self.table_ORIG.fully_qualified_table_name,
            engine=get_db().engine,
        )

        if self.start == self.stop:
            raise ValueError("Start and stop are the same.")

        super().__init__()

        # This needs to happen after the parent classes init method has been
        # called as it relies upon the connection object existing
        self._check_dates()