def __init__(
        self,
        start=None,
        stop=None,
        *,
        hours="all",
        table="events.calls",
        subscriber_subset=None,
        columns=["*"],
        subscriber_identifier="msisdn",
    ):

        # Temporary band-aid; marshmallow deserialises date strings
        # to date objects, so we convert it back here because the
        # lower-level classes still assume we are passing date strings.
        if isinstance(start, datetime.date):
            start = start.strftime("%Y-%m-%d")
        if isinstance(stop, datetime.date):
            stop = stop.strftime("%Y-%m-%d")

        self.start = start
        self.stop = stop
        self.hours = hours
        self.subscriber_subset_ORIG = subscriber_subset
        self.subscriber_subsetter = make_subscriber_subsetter(
            subscriber_subset)
        self.subscriber_identifier = subscriber_identifier.lower()
        if columns == ["*"]:
            self.table_ORIG = Table(table)
            columns = self.table_ORIG.column_names
        else:
            self.table_ORIG = Table(table, columns=columns)
        self.columns = set(columns)
        try:
            self.columns.remove(subscriber_identifier)
            self.columns.add(f"{subscriber_identifier} AS subscriber")
        except KeyError:
            if self.subscriber_subsetter.is_proper_subset:
                warnings.warn(
                    f"No subscriber column requested, did you mean to include {subscriber_identifier} in columns? "
                    "Since you passed a subscriber_subset the data will still be subset by your subscriber subset, "
                    "but the subscriber column will not be present in the output.",
                    stacklevel=2,
                )
        self.columns = sorted(self.columns)

        self.sqlalchemy_table = get_sqlalchemy_table_definition(
            self.table_ORIG.fully_qualified_table_name,
            engine=Query.connection.engine)

        if self.start == self.stop:
            raise ValueError("Start and stop are the same.")

        super().__init__()

        # This needs to happen after the parent classes init method has been
        # called as it relies upon the connection object existing
        self._check_dates()
Exemple #2
0
def test_print_dependency_tree():
    """
    Test that the expected dependency tree is printed for a daily location query (with an explicit subset).
    """
    subscriber_subsetter = make_subscriber_subsetter(
        CustomQuery(
            "SELECT duration, msisdn as subscriber FROM events.calls WHERE duration < 10",
            ["duration", "subscriber"],
        ))
    q = daily_location(date="2016-01-02",
                       method="most-common",
                       subscriber_subset=subscriber_subsetter)

    expected_output = textwrap.dedent("""\
        <Query of type: MostFrequentLocation, query_id: 'xxxxx'>
          - <Query of type: PolygonSpatialUnit, query_id: 'xxxxx'>
             - <Table: 'geography.admin3', query_id: 'xxxxx'>
          - <Query of type: SubscriberLocations, query_id: 'xxxxx'>
             - <Query of type: JoinToLocation, query_id: 'xxxxx'>
                - <Query of type: PolygonSpatialUnit, query_id: 'xxxxx'>
                   - <Table: 'geography.admin3', query_id: 'xxxxx'>
                - <Query of type: EventsTablesUnion, query_id: 'xxxxx'>
                   - <Query of type: EventTableSubset, query_id: 'xxxxx'>
                      - <Query of type: CustomQuery, query_id: 'xxxxx'>
                      - <Table: 'events.sms', query_id: 'xxxxx'>
                         - <Table: 'events.sms', query_id: 'xxxxx'>
                   - <Query of type: EventTableSubset, query_id: 'xxxxx'>
                      - <Query of type: CustomQuery, query_id: 'xxxxx'>
                      - <Table: 'events.calls', query_id: 'xxxxx'>
                         - <Table: 'events.calls', query_id: 'xxxxx'>
             - <Query of type: PolygonSpatialUnit, query_id: 'xxxxx'>
                - <Table: 'geography.admin3', query_id: 'xxxxx'>
        """)

    s = StringIO()
    print_dependency_tree(q, stream=s)
    output = s.getvalue()
    output_with_query_ids_replaced = re.sub(r"\b[0-9a-f]+\b", "xxxxx", output)

    assert expected_output == output_with_query_ids_replaced
Exemple #3
0
    def __init__(
        self,
        *,
        start=None,
        stop=None,
        hours="all",
        hour_slices=None,
        table="events.calls",
        subscriber_subset=None,
        columns=["*"],
        subscriber_identifier="msisdn",
    ):

        if hours != "all" and hour_slices is not None:
            raise ValueError(
                "The arguments `hours` and `hour_slice` are mutually exclusive."
            )
        if hours != "all":
            assert (isinstance(hours, tuple) and len(hours) == 2
                    and isinstance(hours[0], int)
                    and isinstance(hours[1], int))  # sanity check

            start_hour = hours[0]
            stop_hour = hours[1]
            start_hour_str = f"{start_hour:02d}:00"
            stop_hour_str = f"{stop_hour:02d}:00"
            if start_hour <= stop_hour:
                hs = HourInterval(start_hour=start_hour_str,
                                  stop_hour=stop_hour_str,
                                  freq="day")
                self.hour_slices = HourSlice(hour_intervals=[hs])
            else:
                # If hours are backwards, then this is interpreted as spanning midnight,
                # so we split it into two time slices for the beginning/end of the day.
                hs1 = HourInterval(start_hour=None,
                                   stop_hour=stop_hour_str,
                                   freq="day")
                hs2 = HourInterval(start_hour=start_hour_str,
                                   stop_hour=None,
                                   freq="day")
                self.hour_slices = HourSlice(hour_intervals=[hs1, hs2])
        else:
            self.hour_slices = HourSlice(hour_intervals=[])

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.subscriber_subsetter = make_subscriber_subsetter(
            subscriber_subset)
        self.subscriber_identifier = subscriber_identifier.lower()
        if columns == ["*"]:
            self.table_ORIG = Table(table)
            columns = self.table_ORIG.column_names
        else:
            self.table_ORIG = Table(table, columns=columns)
        self.columns = set(columns)
        try:
            self.columns.remove(subscriber_identifier)
            self.columns.add(f"{subscriber_identifier} AS subscriber")
        except KeyError:
            if self.subscriber_subsetter.is_proper_subset:
                warnings.warn(
                    f"No subscriber column requested, did you mean to include {subscriber_identifier} in columns? "
                    "Since you passed a subscriber_subset the data will still be subset by your subscriber subset, "
                    "but the subscriber column will not be present in the output.",
                    stacklevel=2,
                )
        self.columns = sorted(self.columns)

        self.sqlalchemy_table = get_sqlalchemy_table_definition(
            self.table_ORIG.fully_qualified_table_name,
            engine=get_db().engine,
        )

        if self.start == self.stop:
            raise ValueError("Start and stop are the same.")

        super().__init__()

        # This needs to happen after the parent classes init method has been
        # called as it relies upon the connection object existing
        self._check_dates()