def test_total_location_events_column_names(exemplar_level_param, interval,
                                            direction):
    """ Test that column_names property of TotalLocationEvents matches head(0)"""
    tle = TotalLocationEvents("2016-01-01",
                              "2016-01-04",
                              **exemplar_level_param,
                              interval=interval,
                              direction=direction)
    assert tle.head(0).columns.tolist() == tle.column_names
 def test_only_incoming(self):
     """
     TotalLocationEvents() can get activity, ignoring outgoing calls.
     """
     te = TotalLocationEvents("2016-01-01",
                              "2016-01-04",
                              level="versioned-site",
                              direction="in")
     df = te.get_dataframe()
     self.assertIs(type(df), DataFrame)
     # Test one of the values
     df.date = df.date.astype(str)
     val = list(df[(df.date == "2016-01-01") & (df.site_id == "6qpN0p") &
                   (df.hour == 0)].total)[0]
     self.assertEqual(val, 2)
 def test_ignore_texts(self):
     """
     TotalLocationEvents() can get the total activity at cell level excluding texts.
     """
     te = TotalLocationEvents("2016-01-01",
                              "2016-01-04",
                              level="versioned-site",
                              table="events.calls")
     df = te.get_dataframe()
     self.assertIs(type(df), DataFrame)
     # Test one of the values
     df.date = df.date.astype(str)
     val = list(df[(df.date == "2016-01-01") & (df.site_id == "0xqNDj") &
                   (df.hour == 3)].total)[0]
     self.assertEqual(val, 3)
def test_bad_interval_raises_error():
    """Total location events raises an error for a bad interval."""
    with pytest.raises(ValueError):
        TotalLocationEvents("2016-01-01",
                            "2016-01-04",
                            level="versioned-site",
                            interval="BAD_INTERVAL")
    def test_events_at_cell_level(self):
        """
        TotalLocationEvents() returns data at the level of the cell.
        """

        te = TotalLocationEvents("2016-01-01",
                                 "2016-01-04",
                                 level="versioned-site")
        df = te.get_dataframe()
        self.assertIs(type(df), DataFrame)

        # Test one of the values
        df.date = df.date.astype(str)
        val = list(df[(df.date == "2016-01-03") & (df.site_id == "zArRjg") &
                      (df.hour == 17)].total)[0]
        self.assertEqual(val, 3)
 def test_events_daily(self):
     """
     TotalLocationEvents() can get activity on a daily level.
     """
     te = TotalLocationEvents("2016-01-01",
                              "2016-01-04",
                              level="versioned-site",
                              interval="day")
     df = te.get_dataframe()
     self.assertIs(type(df), DataFrame)
     self.assertEqual(list(df.columns),
                      ["site_id", "version", "lon", "lat", "date", "total"])
     # Test one of the values
     df.date = df.date.astype(str)
     val = list(df[(df.date == "2016-01-03")
                   & (df.site_id == "B8OaG5")].total)[0]
     self.assertEqual(val, 95)
Exemple #7
0
def test_bad_direction_raises_error():
    """Total location events raises an error for a bad direction."""
    with pytest.raises(ValueError):
        TotalLocationEvents(
            "2016-01-01",
            "2016-01-04",
            spatial_unit=make_spatial_unit("versioned-site"),
            interval="min",
            direction="BAD_DIRECTION",
        )
def test_events_daily(get_dataframe):
    """
    TotalLocationEvents() can get activity on a daily level.
    """
    te = TotalLocationEvents(
        "2016-01-01", "2016-01-04", level="versioned-site", interval="day"
    )
    df = get_dataframe(te)

    # Test one of the values
    df.date = df.date.astype(str)
    val = list(df[(df.date == "2016-01-03") & (df.site_id == "B8OaG5")].total)[0]
    assert val == 95
def test_events_at_cell_level(get_dataframe):
    """
    TotalLocationEvents() returns data at the level of the cell.
    """

    te = TotalLocationEvents("2016-01-01", "2016-01-04", level="versioned-site")
    df = get_dataframe(te)

    # Test one of the values
    df.date = df.date.astype(str)
    val = list(
        df[(df.date == "2016-01-03") & (df.site_id == "zArRjg") & (df.hour == 17)].total
    )[0]
    assert val == 3
 def test_events_min(self):
     """
     TotalLocationEvents() can get events on a min-by-min basis.
     """
     te = TotalLocationEvents("2016-01-01",
                              "2016-01-04",
                              level="versioned-site",
                              interval="min")
     df = te.get_dataframe()
     self.assertIs(type(df), DataFrame)
     self.assertEqual(
         list(df.columns),
         [
             "site_id", "version", "lon", "lat", "date", "hour", "min",
             "total"
         ],
     )
     # Test one of the values
     df.date = df.date.astype(str)
     val = list(df[(df.date == "2016-01-03")
                   & (df.site_id == "zdNQx2")
                   & (df.hour == 15)
                   & (df["min"] == 20)].total)[0]
     self.assertEqual(val, 1)
Exemple #11
0
def test_events_at_cell_level(get_dataframe):
    """
    TotalLocationEvents() returns data at the level of the cell.
    """

    te = TotalLocationEvents("2016-01-01",
                             "2016-01-04",
                             spatial_unit=make_spatial_unit("cell"))
    df = get_dataframe(te)

    # Test one of the values
    df.date = df.date.astype(str)
    val = list(df[(df.date == "2016-01-03") & (df.location_id == "1Gc6RSfZ") &
                  (df.hour == 17)].value)[0]
    assert val == 4
Exemple #12
0
    def _flowmachine_query_obj(self):
        """
        Return the underlying flowmachine daily_location object.

        Returns
        -------
        Query
        """
        return TotalLocationEvents(
            start=self.start_date,
            stop=self.end_date,
            direction=self.direction,
            table=self.event_types,
            level=self.aggregation_unit,
            subscriber_subset=self.subscriber_subset,
        )
Exemple #13
0
def test_only_incoming(get_dataframe):
    """
    TotalLocationEvents() can get activity, ignoring outgoing calls.
    """
    te = TotalLocationEvents(
        "2016-01-01",
        "2016-01-04",
        spatial_unit=make_spatial_unit("versioned-site"),
        direction="in",
    )
    df = get_dataframe(te)
    # Test one of the values
    df.date = df.date.astype(str)
    val = list(df[(df.date == "2016-01-01") & (df.site_id == "6qpN0p") &
                  (df.hour == 0)].value)[0]
    assert val == 2
Exemple #14
0
    def _flowmachine_query_obj(self):
        """
        Return the underlying flowmachine daily_location object.

        Returns
        -------
        Query
        """
        return RedactedTotalEvents(total_events=TotalLocationEvents(
            start=self.start_date,
            stop=self.end_date,
            interval=self.interval,
            direction=self.direction,
            table=self.event_types,
            spatial_unit=self.aggregation_unit,
            subscriber_subset=self.subscriber_subset,
        ))
def test_events_min(get_dataframe):
    """
    TotalLocationEvents() can get events on a min-by-min basis.
    """
    te = TotalLocationEvents("2016-01-01",
                             "2016-01-04",
                             level="versioned-site",
                             interval="min")
    df = get_dataframe(te)

    # Test one of the values
    df.date = df.date.astype(str)
    val = list(df[(df.date == "2016-01-03")
                  & (df.site_id == "zdNQx2")
                  & (df.hour == 15)
                  & (df["min"] == 20)].total)[0]
    assert val == 1
Exemple #16
0
def test_ignore_texts(get_dataframe):
    """
    TotalLocationEvents() can get the total activity at cell level excluding texts.
    """
    te = TotalLocationEvents(
        "2016-01-01",
        "2016-01-04",
        spatial_unit=make_spatial_unit("versioned-site"),
        table="events.calls",
    )
    df = get_dataframe(te)

    # Test one of the values
    df.date = df.date.astype(str)
    val = list(df[(df.date == "2016-01-01") & (df.site_id == "0xqNDj") &
                  (df.hour == 3)].value)[0]
    assert val == 3
def test_all_above_threshold(get_dataframe):
    """
    TotalLocationEvents() can get activity on a daily level but only above threshold.
    """
    te = RedactedTotalEvents(total_events=TotalLocationEvents(
        "2016-01-01",
        "2016-01-02",
        spatial_unit=make_spatial_unit("cell"),
        interval="day",
        table=["events.calls"],
    ))
    us = get_dataframe(
        RedactedUniqueSubscriberCounts(
            unique_subscriber_counts=UniqueSubscriberCounts(
                "2016-01-01", "2016-01-02", table=["events.calls"])))

    te_df = get_dataframe(te)

    assert all(te_df.value > 15)
    assert set(us.location_id) == set(te_df.location_id)
Exemple #18
0
def construct_query_object(query_kind, params):  # pragma: no cover
    """
    Create an instance of the appropriate subclass of flowmachine.core.query.Query

    Parameters
    ----------
    query_kind : str
        The kind of query to be constructed. Example: "daily_location".

    params : dict
        Parameters to use in the query construction.

    Returns
    -------
    flowmachine.core.query.Query
    """
    params = deepcopy(
        params
    )  # Operate on a copy to avoid mutating the passed in dict, which might change the redis lookup
    error_msg_prefix = (
        f"Error when constructing query of kind {query_kind} with parameters {params}"
    )
    try:
        subscriber_subset = params["subscriber_subset"]
        if subscriber_subset == "all":
            params["subscriber_subset"] = None
        else:
            if isinstance(subscriber_subset, dict):
                raise NotImplementedError(
                    "Proper subsetting not implemented yet.")
            else:
                raise QueryProxyError(
                    f"{error_msg_prefix}: 'Cannot construct {query_kind} subset from given input: {subscriber_subset}'"
                )
    except KeyError:
        pass  # No subset param

    if "daily_location" == query_kind:
        date = params["date"]
        method = params["daily_location_method"]
        level = params["aggregation_unit"]
        subscriber_subset = params["subscriber_subset"]

        allowed_methods = ["last", "most-common"]
        allowed_levels = ["admin0", "admin1", "admin2", "admin3", "admin4"]

        if method not in allowed_methods:
            raise QueryProxyError(
                f"{error_msg_prefix}: 'Unrecognised method '{method}', must be one of: {allowed_methods}'"
            )

        if level not in allowed_levels:
            raise QueryProxyError(
                f"{error_msg_prefix}: 'Unrecognised level '{level}', must be one of: {allowed_levels}'"
            )

        try:
            q = daily_location(
                date=date,
                method=method,
                level=level,
                subscriber_subset=subscriber_subset,
            )
        except Exception as e:
            raise QueryProxyError(f"{error_msg_prefix}: '{e}'")
    elif "location_event_counts" == query_kind:
        start_date = params["start_date"]
        end_date = params["end_date"]
        interval = params["interval"]
        level = params["aggregation_unit"]
        subscriber_subset = params["subscriber_subset"]
        direction = params["direction"]
        event_types = params["event_types"]

        allowed_intervals = TotalLocationEvents.allowed_intervals
        allowed_directions = ["in", "out", "all"]
        allowed_levels = [
            "admin0",
            "admin1",
            "admin2",
            "admin3",
            "admin4",
            "site",
            "cell",
        ]

        if interval not in allowed_intervals:
            raise QueryProxyError(
                f"{error_msg_prefix}: 'Unrecognised interval '{interval}', must be one of: {allowed_intervals}'"
            )

        if level not in allowed_levels:
            raise QueryProxyError(
                f"{error_msg_prefix}: 'Unrecognised level '{level}', must be one of: {allowed_levels}'"
            )

        if level in ["cell", "site"]:
            level = f"versioned-{level}"

        if direction not in allowed_directions:
            raise QueryProxyError(
                f"{error_msg_prefix}: 'Unrecognised direction '{direction}', must be one of: {allowed_directions}'"
            )
        if direction == "all":
            direction = "both"

        try:
            q = TotalLocationEvents(
                start=start_date,
                stop=end_date,
                direction=direction,
                table=event_types,
                level=level,
                subscriber_subset=subscriber_subset,
            )
            logger.debug(f"Made TotalLocationEvents query. {q.__dict__}")
        except Exception as e:
            raise QueryProxyError(f"{error_msg_prefix}: '{e}'")
    elif "modal_location" == query_kind:
        locations = params["locations"]
        aggregation_unit = params["aggregation_unit"]
        try:
            location_objects = []
            for loc in locations:
                query_kind = loc["query_kind"]
                if query_kind != "daily_location":
                    raise QueryProxyError(
                        f"{error_msg_prefix}: Currently modal location takes only daily locations as input."
                    )
                if aggregation_unit != loc["params"]["aggregation_unit"]:
                    raise QueryProxyError(
                        f"{error_msg_prefix}: Modal location aggregation unit must be the same as the ones of all input locations."
                    )
                params = loc["params"]
                dl = construct_query_object(query_kind, params)
                location_objects.append(dl)
            q = ModalLocation(*location_objects)
        except Exception as e:
            raise QueryProxyError(f"{error_msg_prefix}: '{e}'")

    elif "flows" == query_kind:
        aggregation_unit = params["aggregation_unit"]
        try:
            from_location = params["from_location"]
            to_location = params["to_location"]
            if (aggregation_unit != from_location["params"]["aggregation_unit"]
                    or aggregation_unit !=
                    to_location["params"]["aggregation_unit"]):
                raise QueryProxyError(
                    f"{error_msg_prefix}: Flow aggregation unit must be the same as the ones for from_location and to_location."
                )
            from_location_object = construct_query_object(
                from_location["query_kind"], from_location["params"])
            to_location_object = construct_query_object(
                to_location["query_kind"], to_location["params"])
            q = Flows(from_location_object, to_location_object)
        except Exception as e:
            raise QueryProxyError(f"FIXME (flows): {e}")

    elif "meaningful_locations_aggregate" == query_kind:
        aggregation_unit = params["aggregation_unit"]
        mfl = params["meaningful_locations"]
        try:
            q = MeaningfulLocationsAggregate(
                meaningful_locations=construct_query_object(**mfl),
                level=aggregation_unit,
            )
        except Exception as e:
            raise QueryProxyError(
                f"FIXME (meaningful_location_aggregate): {e}")

    elif "meaningful_locations_od_matrix" == query_kind:
        aggregation_unit = params["aggregation_unit"]
        mfl_a = params["meaningful_locations_a"]
        mfl_b = params["meaningful_locations_b"]
        try:
            q = MeaningfulLocationsOD(
                meaningful_locations_a=construct_query_object(**mfl_a),
                meaningful_locations_b=construct_query_object(**mfl_b),
                level=aggregation_unit,
            )
        except Exception as e:
            raise QueryProxyError(
                f"FIXME (meaningful_location_od_matrix): {e}")

    elif "meaningful_locations" == query_kind:
        label = params["label"]
        scores = params["scores"]
        labels = params["labels"]
        clusters = params["clusters"]
        try:
            q = MeaningfulLocations(
                clusters=construct_query_object(**clusters),
                labels=labels,
                scores=construct_query_object(**scores),
                label=label,
            )
        except Exception as e:
            raise QueryProxyError(f"FIXME (meaningful_locations): {e}")
    elif "event_score" == query_kind:
        try:
            q = EventScore(**params)
        except Exception as e:
            raise QueryProxyError(f"FIXME (event_score): {e}")

    elif "hartigan_cluster" == query_kind:
        call_days = params.pop("call_days")
        try:
            q = HartiganCluster(calldays=construct_query_object(**call_days),
                                **params)
        except Exception as e:
            raise QueryProxyError(f"FIXME (hartigan_cluster): {e}")

    elif "call_days" == query_kind:
        sls = params.pop("subscriber_locations")
        try:
            q = CallDays(subscriber_locations=construct_query_object(**sls))
        except Exception as e:
            raise QueryProxyError(f"FIXME (call_days): {e}")
    elif "subscriber_locations" == query_kind:
        try:
            q = subscriber_locations(**params)
        except Exception as e:
            raise QueryProxyError(f"FIXME (subscriber_locations): {e}")
    elif "geography" == query_kind:
        aggregation_unit = params["aggregation_unit"]

        allowed_aggregation_units = [
            "admin0", "admin1", "admin2", "admin3", "admin4"
        ]

        if aggregation_unit not in allowed_aggregation_units:
            raise InvalidGeographyError(
                f"{error_msg_prefix}: 'Unrecognised aggregation unit '{aggregation_unit}', "
                f"must be one of: {allowed_aggregation_units}'")

        try:
            q = GeoTable(
                name=aggregation_unit,
                schema="geography",
                columns=[
                    f"{aggregation_unit}name", f"{aggregation_unit}pcod",
                    "geom"
                ],
            )
        except Exception as e:
            raise QueryProxyError(f"{error_msg_prefix}: '{e}'")

    else:
        error_msg = f"Unsupported query kind: '{query_kind}'"
        logger.error(error_msg)
        raise QueryProxyError(error_msg)

    logger.debug(f"Made {query_kind}: {params}")
    return q