Esempio n. 1
0
    def __init__(
        self,
        start,
        stop,
        unit="km",
        hours: Optional[Tuple[int, int]] = None,
        table="all",
        subscriber_identifier="msisdn",
        ignore_nulls=True,
        subscriber_subset=None,
    ):

        self.unit = unit.lower()
        if unit not in self.allowed_units:
            raise ValueError(
                f"Unrecognised unit {unit}, use one of {self.allowed_units}"
            )

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.ul = SubscriberLocations(
            self.start,
            self.stop,
            spatial_unit=make_spatial_unit("lon-lat"),
            hours=hours,
            table=table,
            subscriber_subset=subscriber_subset,
            subscriber_identifier=subscriber_identifier,
            ignore_nulls=ignore_nulls,
        )

        super().__init__()
Esempio n. 2
0
    def __init__(
        self,
        start,
        stop,
        *,
        hours: Optional[Tuple[int, int]] = None,
        table="all",
        subscriber_identifier="msisdn",
        method="most-common",
        subscriber_subset=None,
    ):
        """"""

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.table = table
        self.subscriber_identifier = subscriber_identifier
        self.subscriber_tac = SubscriberTAC(
            start,
            stop,
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            method=method,
            subscriber_subset=subscriber_subset,
        )
        self.method = method
        self.tacs = Table("infrastructure.tacs")
        self.joined = self.subscriber_tac.join(self.tacs,
                                               "tac",
                                               "id",
                                               how="left")
        super().__init__()
Esempio n. 3
0
    def __init__(
        self,
        start: str,
        stop: str,
        *,
        hours: Optional[Tuple[int, int]] = None,
        table: Union[str, List[str]] = "all",
        subscriber_identifier: str = "msisdn",
        subscriber_subset: Optional[Query] = None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.tables = table
        self.subscriber_identifier = subscriber_identifier
        cols = [self.subscriber_identifier]
        self.unioned = EventsTablesUnion(
            self.start,
            self.stop,
            columns=cols,
            tables=self.tables,
            hours=hours,
            subscriber_subset=subscriber_subset,
            subscriber_identifier=self.subscriber_identifier,
        )

        super().__init__()
Esempio n. 4
0
    def __init__(
        self,
        start,
        stop,
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        hours: Optional[Tuple[int, int]] = None,
        table="all",
        subscriber_subset=None,
    ):

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.spatial_unit = spatial_unit
        self.hours = hours
        self.table = table
        self.ul = UniqueLocations(
            SubscriberLocations(
                start=self.start,
                stop=self.stop,
                spatial_unit=self.spatial_unit,
                hours=self.hours,
                table=self.table,
                subscriber_subset=subscriber_subset,
            ))

        super().__init__()
Esempio n. 5
0
    def __init__(
        self,
        start,
        stop,
        statistic="avg",
        *,
        subscriber_identifier="msisdn",
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.hours = hours
        self.statistic = statistic.lower()
        self.tables = "events.topups"

        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        column_list = [self.subscriber_identifier, "recharge_amount"]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables=self.tables,
            columns=column_list,
            hours=hours,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )

        super().__init__()
Esempio n. 6
0
    def __init__(
        self,
        start,
        stop,
        *,
        hours="all",
        table="all",
        subscriber_identifier="msisdn",
        subscriber_subset=None,
    ):
        """"""

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.table = table
        self.subscriber_identifier = subscriber_identifier
        self.subscriber_tacs = SubscriberTACs(
            start,
            stop,
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )
        self.tacs = Table("infrastructure.tacs")
        self.joined = self.subscriber_tacs.join(self.tacs,
                                                "tac",
                                                "id",
                                                how="left")
        super().__init__()
Esempio n. 7
0
    def __init__(
        self,
        start,
        stop,
        spatial_unit: Optional[AnySpatialUnit] = None,
        hours: Optional[Tuple[int, int]] = None,
        table="all",
        subscriber_identifier="msisdn",
        *,
        ignore_nulls=True,
        subscriber_subset=None,
    ):
        """"""

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("admin", level=3)
        else:
            self.spatial_unit = spatial_unit
        self.hours = hours
        self.table = table
        self.subscriber_identifier = subscriber_identifier
        self.subscriber_locs = SubscriberLocations(
            start=self.start,
            stop=self.stop,
            spatial_unit=self.spatial_unit,
            hours=self.hours,
            table=self.table,
            subscriber_identifier=self.subscriber_identifier,
            ignore_nulls=ignore_nulls,
            subscriber_subset=subscriber_subset,
        )

        super().__init__()
Esempio n. 8
0
    def __init__(
        self,
        start: str,
        stop: str,
        reference_location: BaseLocation,
        statistic: str = "avg",
        unit: str = "km",
        hours: Union[str, Tuple[int, int]] = "all",
        table: Union[str, List[str]] = "all",
        subscriber_identifier: str = "msisdn",
        ignore_nulls: bool = True,
        return_subscribers_not_seen: bool = False,
        subscriber_subset: Optional[Query] = None,
    ):

        self.return_subscribers_not_seen = return_subscribers_not_seen
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.spatial_unit = reference_location.spatial_unit
        subscriber_locations = SubscriberLocations(
            self.start,
            self.stop,
            spatial_unit=self.spatial_unit,
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            ignore_nulls=ignore_nulls,
            subscriber_subset=subscriber_subset,
        )

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        if not isinstance(reference_location, BaseLocation):
            raise ValueError(
                "Argument 'reference_location' should be an instance of BaseLocation class. "
                f"Got: {type(reference_location)}")
        else:
            self.reference_location = reference_location
            self.joined = reference_location.join(
                other=subscriber_locations,
                on_left=["subscriber"],
                left_append="_from",
                right_append="_to",
            ).join(
                DistanceMatrix(spatial_unit=self.spatial_unit),
                on_left=[
                    f"{col}_{direction}" for direction in ("from", "to")
                    for col in self.spatial_unit.location_id_columns
                ],
                right_append="_dist",
                how="left outer",
            )

        self.unit = unit

        super().__init__()
Esempio n. 9
0
    def __init__(
        self,
        start,
        stop,
        *,
        hours: Optional[Tuple[int, int]] = None,
        table="all",
        subscriber_subset=None,
        subscriber_identifier="msisdn",
    ):
        """"""

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.table = table
        self.subscriber_identifier = subscriber_identifier
        self.tbl = EventsTablesUnion(
            start,
            stop,
            columns=[subscriber_identifier, "tac", "datetime"],
            tables=table,
            hours=hours,
            subscriber_subset=subscriber_subset,
            subscriber_identifier=self.subscriber_identifier,
        )

        super().__init__()
Esempio n. 10
0
    def __init__(
        self,
        start,
        stop,
        hours=(20, 4),
        *,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        subscriber_subset=None,
        tables="all",
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        self.hours = hours
        self.tables = tables

        column_list = [
            self.subscriber_identifier,
            "datetime",
            *self.direction.required_columns,
        ]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables=self.tables,
            columns=column_list,
            hours="all",
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )
        super().__init__()
Esempio n. 11
0
    def __init__(
        self,
        start,
        stop,
        *,
        columns,
        tables=None,
        hours="all",
        subscriber_subset=None,
        subscriber_identifier="msisdn",
    ):
        """

        """
        if isinstance(tables, str) and tables.lower() == "all":
            logger.warn(
                "EventsTablesUnion will soon stop accepting the argument tables='all'. Use tables=None instead."
            )
            tables = None

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.columns = columns
        self.tables = self._parse_tables(tables)
        if "*" in columns and len(self.tables) != 1:
            raise ValueError(
                "Must give named tables when combining multiple event type tables."
            )
        self.date_subsets = self._make_table_list(
            hours=hours,
            subscriber_subset=subscriber_subset,
            subscriber_identifier=subscriber_identifier,
        )

        super().__init__()
Esempio n. 12
0
    def __init__(
        self,
        start,
        stop,
        *,
        hours: Optional[Tuple[int, int]] = None,
        table="all",
        subscriber_subset=None,
        subscriber_identifier="msisdn",
        method="most-common",
    ):
        """"""

        if subscriber_identifier == "imei":
            warnings.warn("IMEI has a one to one mapping to TAC number.")

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.table = table
        self.subscriber_identifier = subscriber_identifier
        self.subscriber_tacs = SubscriberTACs(
            start,
            stop,
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )
        self.method = method
        if self.method not in ("most-common", "last"):
            raise ValueError("{} is not a valid method.".format(method))
        super().__init__()
Esempio n. 13
0
    def __init__(
        self,
        start,
        stop,
        *,
        subscriber_identifier="msisdn",
        statistic="sum",
        spatial_unit: Optional[AnySpatialUnit] = None,
        hours="all",
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("admin", level=3)
        else:
            self.spatial_unit = spatial_unit
        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats
                )
            )

        column_list = [
            "id",
            self.subscriber_identifier,
            "msisdn_counterpart",
            "outgoing",
            "duration",
            "location_id",
            "datetime",
        ]
        unioned_query = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables="events.calls",
                columns=column_list,
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=self.subscriber_identifier,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )

        self.joined = unioned_query.subset("outgoing", "t").join(
            unioned_query.subset("outgoing", "f"),
            on_left="id",
            on_right="id",
            right_append="_counterpart",
            how="left",
        )
        warnings.warn("This query is considerably slower than the other variants.")
        super().__init__()
Esempio n. 14
0
    def __init__(
        self,
        start: str,
        stop: str,
        *,
        spatial_unit: Optional[LonLatSpatialUnit] = None,
        departure_rate: Union[pd.DataFrame, float] = 0.1,
        hours: Union[str, Tuple[int, int]] = "all",
        method: str = "last",
        table: Union[str, List[str]] = "all",
        subscriber_identifier: str = "msisdn",
        subscriber_subset: Optional[Query] = None,
    ):

        warnings.warn(
            "The PopulationWeightedOpportunities model is currently **experimental**. "
            + "Please review Yan X-Y et al. " +
            "(http://dx.doi.org/10.1098/rsif.2014.0834) " +
            "before using this model in production.")

        if isinstance(departure_rate, pd.DataFrame):
            # Rename the columns to match what we'll join to
            # sort the dataframe so we'll have a consistent md5
            self.departure_rate = departure_rate.rename(
                columns=lambda x: x if x == "rate" else f"{x}_from").apply(
                    lambda x: x.sort_values().values)
            self.departure_rate = self.departure_rate.reindex(
                columns=sorted(self.departure_rate.columns))
        elif isinstance(departure_rate, float):
            self.departure_rate = departure_rate
        else:
            raise TypeError(f"{departure_rate} must be a float or dataframe")
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("versioned-site")
        else:
            self.spatial_unit = spatial_unit
        self.distance_matrix = DistanceMatrix(spatial_unit=self.spatial_unit,
                                              return_geometry=True)

        self.population_object = ModalLocation(*[
            daily_location(
                d,
                spatial_unit=self.spatial_unit,
                hours=hours,
                method=method,
                table=table,
                subscriber_identifier=subscriber_identifier,
                ignore_nulls=True,
                subscriber_subset=subscriber_subset,
            ) for d in list_of_dates(self.start, self.stop)
        ]).aggregate()

        self.population_buffer_object = _PopulationBuffer(
            population_object=self.population_object,
            distance_matrix=self.distance_matrix,
        )
Esempio n. 15
0
    def __init__(
        self,
        start=None,
        stop=None,
        *,
        table="all",
        total_by="day",
        network_object: AnySpatialUnit = make_spatial_unit("cell"),
        spatial_unit: Optional[AnySpatialUnit] = None,
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
        subscriber_identifier="msisdn",
    ):
        self.start = standardise_date(get_db().min_date(
            table=table) if start is None else start)
        self.stop = standardise_date(get_db().max_date(
            table=table) if stop is None else stop)

        self.table = table
        if isinstance(self.table, str):
            self.table = self.table.lower()
            if self.table != "all" and not self.table.startswith("events"):
                self.table = "events.{}".format(self.table)

        network_object.verify_criterion("is_network_object")
        self.network_object = network_object

        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("admin", level=0)
        else:
            self.spatial_unit = spatial_unit
        # No sense in aggregating network object to network object
        self.spatial_unit.verify_criterion("is_network_object", negate=True)

        events = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables=self.table,
                columns=["location_id", "datetime"],
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=subscriber_identifier,
            ),
            spatial_unit=self.network_object,
            time_col="datetime",
        )

        self.joined = location_joined_query(events,
                                            spatial_unit=self.spatial_unit,
                                            time_col="datetime")
        self.total_by = total_by.lower()
        if self.total_by not in valid_periods:
            raise ValueError("{} is not a valid total_by value.".format(
                self.total_by))

        super().__init__()
Esempio n. 16
0
    def __init__(self, start_date, end_date):
        self.start_date = parse_datestring(start_date)
        self.end_date = parse_datestring(end_date)
        self.start_date_as_str = standardise_date(start_date)
        self.end_date_as_str = standardise_date(end_date)

        self.one_day_past_end_date = self.end_date + dt.timedelta(days=1)
        self.one_day_past_end_date_as_str = standardise_date(
            self.one_day_past_end_date)
Esempio n. 17
0
    def __init__(
        self,
        *,
        subscriber_locations: SubscriberLocations,
        reference_location: Union[BaseLocation, Tuple[float, float]] = (0, 0),
        statistic: str = "avg",
        time_bucket: str = "day",
    ):
        subscriber_locations.spatial_unit.verify_criterion("has_geography")
        subscriber_locations.spatial_unit.verify_criterion("has_lon_lat_columns")
        self.spatial_unit = subscriber_locations.spatial_unit
        if time_bucket.lower() in valid_time_buckets:
            self.aggregate_by = time_bucket.lower()
        else:
            raise ValueError(
                f"'{time_bucket}' is not a valid value for time_bucket. Use one of {valid_time_buckets}"
            )

        if statistic.lower() not in valid_stats:
            raise ValueError(
                f"'{statistic}' is not a valid statistic. Use one of {valid_stats}"
            )
        self.statistic = statistic.lower()
        self.start = standardise_date(subscriber_locations.start)
        self.stop = standardise_date(subscriber_locations.stop)
        if isinstance(reference_location, tuple):
            self.reference_location = reference_location
            self.joined = subscriber_locations
        elif isinstance(reference_location, BaseLocation):
            if reference_location.spatial_unit != subscriber_locations.spatial_unit:
                raise ValueError(
                    "reference_location must have the same spatial unit as subscriber_locations."
                )
            self.reference_location = reference_location
            self.joined = reference_location.join(
                other=subscriber_locations,
                on_left=["subscriber"],
                left_append="_from",
                right_append="_to",
            ).join(
                DistanceMatrix(spatial_unit=self.spatial_unit),
                on_left=[
                    f"{col}_{direction}"
                    for direction in ("from", "to")
                    for col in self.spatial_unit.location_id_columns
                ],
                right_append="_dist",
                how="left outer",
            )
        else:
            raise ValueError(
                "Argument 'reference_location' should be an instance of BaseLocation class or a tuple of two floats. "
                f"Got: {type(reference_location).__name__}"
            )

        super().__init__()
    def __init__(
        self,
        start,
        stop,
        statistic="avg",
        *,
        hours="all",
        tables="all",
        direction: Union[str, Direction] = Direction.BOTH,
        subscriber_subset=None,
        exclude_self_calls=True,
    ):
        self.tables = tables
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.direction = Direction(direction)
        self.exclude_self_calls = exclude_self_calls

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        column_list = [
            "msisdn", "msisdn_counterpart", "id", "location_id", "outgoing"
        ]
        self.tables = tables

        # EventsTablesUnion will only subset on the subscriber identifier,
        # which means that we need to query for a unioned table twice. That has
        # a considerable negative impact on execution time.
        self.unioned_from_query = EventsTablesUnion(
            self.start,
            self.stop,
            columns=column_list,
            tables=self.tables,
            subscriber_identifier="msisdn",
            hours=hours,
            subscriber_subset=subscriber_subset,
        )

        self.unioned_to_query = EventsTablesUnion(
            self.start,
            self.stop,
            columns=column_list,
            tables=self.tables,
            subscriber_identifier="msisdn_counterpart",
            hours=hours,
            subscriber_subset=subscriber_subset,
        )

        self.distance_matrix = DistanceMatrix()

        super().__init__()
Esempio n. 19
0
    def __init__(
        self,
        start,
        stop,
        proportion=0.8,
        *,
        direction: Union[str, Direction] = Direction.BOTH,
        tables="all",
        subscriber_identifier="msisdn",
        hours: Optional[Tuple[int, int]] = None,
        exclude_self_calls=False,
        subscriber_subset=None,
    ):

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.direction = Direction(direction)
        self.tables = tables
        self.subscriber_identifier = subscriber_identifier
        self.exclude_self_calls = exclude_self_calls

        if 1 > proportion > 0:
            self.proportion = proportion
        else:
            raise ValueError(
                "{} is not a valid proportion.".format(proportion))

        self.contact_balance = ContactBalance(
            self.start,
            self.stop,
            hours=self.hours,
            tables=self.tables,
            subscriber_identifier=self.subscriber_identifier,
            direction=self.direction,
            exclude_self_calls=exclude_self_calls,
            subscriber_subset=subscriber_subset,
        )

        self.subscriber_degree = SubscriberDegree(
            self.start,
            self.stop,
            hours=self.hours,
            tables=self.tables,
            subscriber_identifier=self.subscriber_identifier,
            direction=self.direction,
            exclude_self_calls=self.exclude_self_calls,
            subscriber_subset=subscriber_subset,
        )

        self._cols = ["subscriber", "pareto"]

        super().__init__()
Esempio n. 20
0
    def __init__(
        self,
        start: str,
        stop: str,
        *,
        table: Union[None, List[str]] = None,
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        interval: str = "hour",
        direction: Union[str, Direction] = Direction.BOTH,
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
        subscriber_identifier="msisdn",
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.table = table
        self.spatial_unit = spatial_unit
        self.interval = interval
        self.direction = Direction(direction)

        if self.interval not in self.allowed_intervals:
            raise ValueError("'Interval must be one of: {} got: {}".format(
                self.allowed_intervals, self.interval))

        self.time_cols = ["(datetime::date)::text AS date"]
        if self.interval == "hour" or self.interval == "min":
            self.time_cols.append("extract(hour FROM datetime) AS hour")
        if self.interval == "min":
            self.time_cols.append("extract(minute FROM datetime) AS min")

        events_tables_union_cols = [
            "location_id", "datetime", subscriber_identifier
        ]
        # if we need to filter on outgoing/incoming calls, we will also fetch this
        # column. Don't fetch it if it is not needed for both efficiency and the
        # possibility that we might want to do pass another data type which does not
        # have this information.
        events_tables_union_cols += self.direction.required_columns

        self.unioned = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables=self.table,
                columns=events_tables_union_cols,
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=subscriber_identifier,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )
        super().__init__()
Esempio n. 21
0
    def __init__(
        self,
        start,
        stop,
        *,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.OUT,
        statistic="sum",
        spatial_unit: Optional[AnySpatialUnit] = None,
        hours="all",
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("admin", level=3)
        else:
            self.spatial_unit = spatial_unit
        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats
                )
            )

        column_list = [
            self.subscriber_identifier,
            "msisdn_counterpart",
            "duration",
            "location_id",
            "datetime",
            *self.direction.required_columns,
        ]
        self.unioned_query = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables="events.calls",
                columns=column_list,
                hours=hours,
                subscriber_subset=subscriber_subset,
                subscriber_identifier=self.subscriber_identifier,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )
        super().__init__()
Esempio n. 22
0
    def __init__(
        self,
        start,
        stop,
        statistic="avg",
        *,
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        hours="all",
        tables="all",
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.spatial_unit = spatial_unit
        self.hours = hours
        self.tables = tables
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        self.statistic = statistic

        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        column_list = [
            self.subscriber_identifier,
            "location_id",
            "datetime",
            *self.direction.required_columns,
        ]

        self.unioned_query = location_joined_query(
            EventsTablesUnion(
                self.start,
                self.stop,
                tables=self.tables,
                columns=column_list,
                hours=hours,
                subscriber_identifier=subscriber_identifier,
                subscriber_subset=subscriber_subset,
            ),
            spatial_unit=self.spatial_unit,
            time_col="datetime",
        )

        super().__init__()
Esempio n. 23
0
    def __init__(
        self,
        start: str,
        total_periods: int,
        period_length: int = 1,
        period_unit: str = "days",
        hours: Union[str, Tuple[int, int]] = "all",
        table: Union[str, List[str]] = "all",
        subscriber_identifier: str = "msisdn",
        subscriber_subset: Optional[Query] = None,
    ):
        self.start = standardise_date(start)
        self.total_periods = total_periods
        self.period_length = period_length
        if period_unit not in self.allowed_units:
            raise ValueError("`period_unit` must be one of {}".format(
                self.allowed_units))
        self.period_unit = period_unit
        self.starts, self.stops = self._get_start_stops()
        # For convenience also store when the whole thing ends
        self.stop_date = time_period_add(
            self.start, self.total_periods * self.period_length)
        # This will be a long form table of unique subscribers in each time period
        # i.e. a subscriber can appear more than once in this list, up to a maximum
        # of the total time periods.
        self.unique_subscribers_table = self._get_unioned_subscribers_list(
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )

        super().__init__()
    def __init__(
        self,
        start,
        stop,
        numerator,
        *,
        numerator_direction: Union[str, Direction] = Direction.BOTH,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
        tables="all",
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        self.numerator_direction = Direction(numerator_direction)
        self.hours = hours
        self.tables = tables
        self.numerator = numerator if isinstance(numerator,
                                                 list) else [numerator]

        self.numerator_query = EventCount(
            self.start,
            self.stop,
            subscriber_identifier=self.subscriber_identifier,
            direction=self.numerator_direction,
            hours=self.hours,
            subscriber_subset=subscriber_subset,
            tables=self.numerator,
        )

        self.denominator_query = EventCount(
            self.start,
            self.stop,
            subscriber_identifier=self.subscriber_identifier,
            direction=self.direction,
            hours=self.hours,
            subscriber_subset=subscriber_subset,
            tables=self.tables,
        )

        super().__init__()
Esempio n. 25
0
    def __init__(
        self,
        start,
        stop,
        characteristic,
        hours="all",
        table="all",
        subscriber_identifier="msisdn",
        method="most-common",
        subscriber_subset=None,
    ):
        """

        """

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.table = table
        self.subscriber_identifier = subscriber_identifier
        self.characteristic = characteristic
        if self.characteristic not in valid_characteristics:
            raise ValueError("{} is not a valid characteristic.".format(characteristic))
        if method == "most-common":
            self.subscriber_handsets = SubscriberHandsets(
                start,
                stop,
                hours=hours,
                table=table,
                subscriber_identifier=subscriber_identifier,
                subscriber_subset=subscriber_subset,
            )
        else:
            self.subscriber_handsets = SubscriberHandset(
                start,
                stop,
                hours=hours,
                table=table,
                subscriber_identifier=subscriber_identifier,
                subscriber_subset=subscriber_subset,
                method=method,
            )
        self.method = method
        super().__init__()
Esempio n. 26
0
    def __init__(
        self,
        start,
        stop,
        volume="total",
        statistic="sum",
        *,
        subscriber_identifier="msisdn",
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
    ):
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.subscriber_identifier = subscriber_identifier
        self.hours = hours
        self.volume = volume
        self.statistic = statistic.lower()
        self.tables = "events.mds"

        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats
                )
            )

        if self.volume not in {"total", "upload", "download"}:
            raise ValueError(f"{self.volume} is not a valid volume.")

        column_list = [self.subscriber_identifier, f"volume_{self.volume}"]

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables=self.tables,
            columns=column_list,
            hours=hours,
            subscriber_identifier=subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )

        super().__init__()
Esempio n. 27
0
    def __init__(
        self,
        start: str,
        stop: str,
        statistic: str = "avg",
        *,
        hours: Union[str, Tuple[int, int]] = "all",
        tables: Union[str, List[str]] = "all",
        subscriber_identifier: str = "msisdn",
        subscriber_subset: Optional[Query] = None,
        direction: Union[str, Direction] = Direction.OUT,
    ):

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.tables = tables
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)

        column_list = [
            self.subscriber_identifier,
            "datetime",
            *self.direction.required_columns,
        ]

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        self.unioned_query = EventsTablesUnion(
            self.start,
            self.stop,
            tables=self.tables,
            columns=column_list,
            hours=self.hours,
            subscriber_identifier=self.subscriber_identifier,
            subscriber_subset=subscriber_subset,
        )
        super().__init__()
Esempio n. 28
0
    def __init__(
        self,
        start,
        stop,
        *,
        location,
        spatial_unit: AnySpatialUnit = make_spatial_unit("cell"),
        hours="all",
        table="all",
        subscriber_identifier="msisdn",
        ignore_nulls=True,
        subscriber_subset=None,
    ):
        """


        """

        if location == "any" and spatial_unit != make_spatial_unit("cell"):
            raise ValueError(
                "Invalid parameter combination: location='any' can only be used with cell spatial unit."
            )

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.location = location

        self.ul = SubscriberLocations(
            self.start,
            self.stop,
            spatial_unit=spatial_unit,
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            ignore_nulls=ignore_nulls,
            subscriber_subset=subscriber_subset,
        )

        self.table = self.ul.table
        self.subscriber_identifier = self.ul.subscriber_identifier

        super().__init__()
Esempio n. 29
0
    def __init__(
        self,
        start,
        stop,
        *,
        min_calls,
        subscriber_identifier="msisdn",
        direction: Union[str, Direction] = Direction.BOTH,
        spatial_unit: Optional[AnySpatialUnit] = None,
        hours: Optional[Tuple[int, int]] = None,
        subscriber_subset=None,
    ):

        from ...features import PerLocationSubscriberCallDurations

        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.min_calls = min_calls
        self.subscriber_identifier = subscriber_identifier
        self.direction = Direction(direction)
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("admin", level=3)
        else:
            self.spatial_unit = spatial_unit

        self.pslds = PerLocationSubscriberCallDurations(
            start=self.start,
            stop=self.stop,
            subscriber_identifier=self.subscriber_identifier,
            direction=self.direction,
            spatial_unit=self.spatial_unit,
            statistic="count",
            hours=hours,
            subscriber_subset=subscriber_subset,
        )

        self.pslds_subset = self.pslds.numeric_subset("value",
                                                      low=self.min_calls,
                                                      high=inf)

        super().__init__()
Esempio n. 30
0
    def __init__(
        self,
        start,
        stop,
        *,
        hours: Optional[Tuple[int, int]] = None,
        tables="all",
        exclude_self_calls=True,
        subscriber_subset=None,
    ):
        self.tables = tables
        self.start = standardise_date(start)
        self.stop = standardise_date(stop)
        self.hours = hours
        self.exclude_self_calls = exclude_self_calls
        self.tables = tables

        self.contact_in_query = ContactBalance(
            self.start,
            self.stop,
            hours=self.hours,
            tables=self.tables,
            subscriber_identifier="msisdn",
            direction=Direction.IN,
            exclude_self_calls=self.exclude_self_calls,
            subscriber_subset=subscriber_subset,
        )

        self.contact_out_query = ContactBalance(
            self.start,
            self.stop,
            hours=self.hours,
            tables=self.tables,
            subscriber_identifier="msisdn",
            direction=Direction.OUT,
            exclude_self_calls=self.exclude_self_calls,
            subscriber_subset=subscriber_subset,
        )

        super().__init__()