Exemple #1
0
def test_dl_count_sum_equal_or_less_than_period(get_dataframe):
    """
    Sum of LocationVisits per subscriber should not be more than total
    number of days between 'start_date' and 'stop_date'
    """
    # test 1
    days = 7
    start_date = "2016-01-01"
    stop_date = "2016-01-07"
    lv = LocationVisits(
        DayTrajectories(*[
            daily_location(d, level="admin3", method="last")
            for d in list_of_dates(start_date, stop_date)
        ]))
    df = get_dataframe(lv)
    assert df[df["subscriber"] == df.iloc[0, 0]]["dl_count"].sum() <= days
    # test 2
    days = 3
    start_date = "2016-01-01"
    stop_date = "2016-01-03"
    lv = LocationVisits(
        DayTrajectories(*[
            daily_location(d, level="admin3", method="last")
            for d in list_of_dates(start_date, stop_date)
        ]))
    df = get_dataframe(lv)
    assert df[df["subscriber"] == df.iloc[0, 0]]["dl_count"].sum() <= days
Exemple #2
0
    def __init__(self,
                 start,
                 stop,
                 method="home-location",
                 level="versioned-site",
                 **kwargs):

        warnings.warn("The PWO model is currently **experimental**. " +
                      "Please review Yan X-Y et al. " +
                      "(http://dx.doi.org/10.1098/rsif.2014.0834) " +
                      "before using this model in production.")

        self.start = start
        self.stop = stop
        self.method = method
        self.level = level
        self.distance_matrix = DistanceMatrix(date=self.stop,
                                              level=level,
                                              return_geometry=True,
                                              **kwargs)

        if self.method == "home-location":
            self.population_object = ModalLocation(*[
                daily_location(d, level=self.level, **kwargs)
                for d in list_of_dates(self.start, self.stop)
            ]).aggregate()

        self.population_buffer_object = _populationBuffer(
            level=self.level,
            population_object=self.population_object,
            distance_matrix=self.distance_matrix,
        )
    def setUp(self):

        self.hl = HomeLocation(*[
            daily_location(d)
            for d in list_of_dates("2016-01-01", "2016-01-03")
        ])
        self.hdf = self.hl.get_dataframe()
        self.hdf = self.hl.get_dataframe().set_index("subscriber")
Exemple #4
0
    def _check_dates(self):

        # Handle the logic for dealing with missing dates.
        # If there are no dates present, then we raise an error
        # if some are present, but some are missing we raise a
        # warning.
        # If the subscriber does not pass a start or stop date, then we take
        # the min/max date in the events.calls table
        if self.start is None:
            d1 = (
                get_db()
                .min_date(self.table_ORIG.fully_qualified_table_name.split(".")[1])
                .strftime("%Y-%m-%d")
            )
        else:
            d1 = self.start.split()[0]

        if self.stop is None:
            d2 = (
                get_db()
                .max_date(self.table_ORIG.fully_qualified_table_name.split(".")[1])
                .strftime("%Y-%m-%d")
            )
        else:
            d2 = self.stop.split()[0]

        all_dates = list_of_dates(d1, d2)
        # Slightly annoying feature, but if the subscriber passes a date such as '2016-01-02'
        # this will be interpreted as midnight, so we don't want to include this in our
        # calculations. Check for this here, an if this is the case pop the final element
        # of the list
        if (self.stop is not None) and (
            len(self.stop) == 10 or self.stop.endswith("00:00:00")
        ):
            all_dates.pop(-1)
        # This will be a true false list for whether each of the dates
        # is present in the database
        try:
            db_dates = [
                d.strftime("%Y-%m-%d")
                for d in get_db().available_dates[self.table_ORIG.name]
            ]
        except KeyError:  # No dates at all for this table
            raise MissingDateError
        dates_present = [d in db_dates for d in all_dates]
        logger.debug(
            f"Data for {sum(dates_present)}/{len(dates_present)} calendar dates."
        )
        # All dates are missing
        if not any(dates_present):
            raise MissingDateError
        # Some dates are missing, others are present
        elif not all(dates_present):
            present_dates = [d for p, d in zip(dates_present, all_dates) if p]
            warnings.warn(
                f"{len(dates_present) - sum(dates_present)} of {len(dates_present)} calendar dates missing. Earliest date is {present_dates[0]}, latest is {present_dates[-1]}.",
                stacklevel=2,
            )
Exemple #5
0
    def __init__(
        self,
        start: str,
        stop: str,
        *,
        spatial_unit: Optional[LonLatSpatialUnit] = None,
        departure_rate: Union[pd.DataFrame, float] = 0.1,
        hours: Union[str, Tuple[int, int]] = "all",
        method: str = "last",
        table: Union[str, List[str]] = "all",
        subscriber_identifier: str = "msisdn",
        subscriber_subset: Optional[Query] = None,
    ):

        warnings.warn(
            "The PopulationWeightedOpportunities model is currently **experimental**. "
            + "Please review Yan X-Y et al. " +
            "(http://dx.doi.org/10.1098/rsif.2014.0834) " +
            "before using this model in production.")

        if isinstance(departure_rate, pd.DataFrame):
            # Rename the columns to match what we'll join to
            # sort the dataframe so we'll have a consistent md5
            self.departure_rate = departure_rate.rename(
                columns=lambda x: x if x == "rate" else f"{x}_from").apply(
                    lambda x: x.sort_values().values)
            self.departure_rate = self.departure_rate.reindex(
                columns=sorted(self.departure_rate.columns))
        elif isinstance(departure_rate, float):
            self.departure_rate = departure_rate
        else:
            raise TypeError(f"{departure_rate} must be a float or dataframe")
        self.start = start
        self.stop = stop
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("versioned-site")
        else:
            self.spatial_unit = spatial_unit
        self.distance_matrix = DistanceMatrix(spatial_unit=self.spatial_unit,
                                              return_geometry=True)

        self.population_object = ModalLocation(*[
            daily_location(
                d,
                spatial_unit=self.spatial_unit,
                hours=hours,
                method=method,
                table=table,
                subscriber_identifier=subscriber_identifier,
                ignore_nulls=True,
                subscriber_subset=subscriber_subset,
            ) for d in list_of_dates(self.start, self.stop)
        ]).aggregate()

        self.population_buffer_object = _PopulationBuffer(
            population_object=self.population_object,
            distance_matrix=self.distance_matrix,
        )
 def test_subset_correct(self):
     """Test that pushed in subsetting matches .subset result"""
     su = EventTableSubset("2016-01-01",
                           "2016-01-03",
                           subscriber_subset=self.subscriber_list)
     subsu = EventTableSubset("2016-01-01",
                              "2016-01-03").subset("subscriber",
                                                   self.subscriber_list)
     self.assertTrue(all(su.get_dataframe() == subsu.get_dataframe()))
     su = HomeLocation(*[
         daily_location(d, subscriber_subset=self.subscriber_list)
         for d in list_of_dates("2016-01-01", "2016-01-07")
     ])
     subsu = HomeLocation(*[
         daily_location(d)
         for d in list_of_dates("2016-01-01", "2016-01-03")
     ]).subset("subscriber", self.subscriber_list)
     self.assertTrue(all(su.get_dataframe() == subsu.get_dataframe()))
Exemple #7
0
def test_subset_correct(subscriber_list, get_dataframe):
    """Test that pushed in subsetting matches .subset result"""
    su = EventTableSubset(start="2016-01-01",
                          stop="2016-01-03",
                          subscriber_subset=subscriber_list)
    subsu = EventTableSubset(start="2016-01-01",
                             stop="2016-01-03").subset("subscriber",
                                                       subscriber_list)
    assert all(get_dataframe(su) == get_dataframe(subsu))
    su = ModalLocation(*[
        daily_location(d, subscriber_subset=subscriber_list)
        for d in list_of_dates("2016-01-01", "2016-01-07")
    ])
    subsu = ModalLocation(
        *
        [daily_location(d)
         for d in list_of_dates("2016-01-01", "2016-01-03")]).subset(
             "subscriber", subscriber_list)
    assert all(get_dataframe(su) == get_dataframe(subsu))
Exemple #8
0
def test_error_when_home_location_not_latlong():
    """
    Test that error is raised if home location passed to class
    is not using level lat-lon
    """

    hl = HomeLocation(
        *[daily_location(d) for d in list_of_dates("2016-01-01", "2016-01-02")]
    )

    with pytest.raises(ValueError):
        Displacement("2016-01-01", "2016-01-02", home_locations=hl, statistic="avg")
def test_can_be_aggregated_lon_lat(get_dataframe):
    """
    Query can be aggregated to a spatial level with lon-lat data.
    """
    hl = ModalLocation(*[
        daily_location(
            d, spatial_unit=make_spatial_unit("lon-lat"), method="last")
        for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    agg = hl.aggregate()
    df = get_dataframe(agg)
    assert ["lon", "lat", "value"] == list(df.columns)
 def test_can_be_aggregated_latlong(self):
     """
     Query can be aggregated to a spatial level with lat-lon data.
     """
     hl = HomeLocation(*[
         daily_location(d, level="lat-lon", method="last")
         for d in list_of_dates("2016-01-01", "2016-01-03")
     ])
     agg = hl.aggregate()
     df = agg.get_dataframe()
     self.assertIs(type(df), pd.DataFrame)
     self.assertEqual(list(df.columns), ["lat", "lon", "total"])
def test_error_when_modal_location_not_lon_lat():
    """
    Test that error is raised if home location passed to class
    is not using lon-lat spatial unit
    """

    ml = ModalLocation(
        *[daily_location(d) for d in list_of_dates("2016-01-01", "2016-01-02")]
    )

    with pytest.raises(ValueError):
        Displacement("2016-01-01", "2016-01-02", modal_locations=ml, statistic="avg")
Exemple #12
0
def test_contact_reference_location_false_level_raises():
    """ Test ValueError is raised for contact_location with non-compliant level. """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    ml = ModalLocation(*[
        daily_location(
            d,
            level="admin3",
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    with pytest.raises(ValueError):
        query = ContactReferenceLocationStats(cb, ml)
Exemple #13
0
def test_contact_reference_location_stats_false_statistic_raises():
    """ Test ValueError is raised for non-compliant statistics parameter. """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    ml = ModalLocation(*[
        daily_location(
            d,
            spatial_unit=make_spatial_unit("versioned-cell"),
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    with pytest.raises(ValueError):
        query = ContactReferenceLocationStats(cb, ml, statistic="error")
Exemple #14
0
    def __init__(self,
                 start,
                 stop,
                 modal_locations=None,
                 statistic="avg",
                 unit="km",
                 **kwargs):

        # need to subtract one day from hl end in order to be
        # comparing over same period...
        self.stop_sl = stop
        self.stop_hl = str(parse_datestring(stop) - relativedelta(days=1))

        self.start = start

        allowed_levels = ["lat-lon", "versioned-cell", "versioned-site"]
        if modal_locations:
            if (isinstance(modal_locations, ModalLocation)
                    and modal_locations.level in allowed_levels):
                hl = modal_locations
            else:
                raise ValueError(
                    f"Argument 'modal_locations' should be an instance of ModalLocation class with level in {allowed_levels}"
                )
        else:
            hl = ModalLocation(*[
                daily_location(date, level="lat-lon", **kwargs)
                for date in list_of_dates(self.start, self.stop_hl)
            ])

        sl = subscriber_locations(self.start,
                                  self.stop_sl,
                                  level="lat-lon",
                                  **kwargs)

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        self.joined = hl.join(
            sl,
            on_left="subscriber",
            on_right="subscriber",
            how="left",
            left_append="_home_loc",
            right_append="",
        )

        self.unit = unit

        super().__init__()
def test_can_be_aggregated_latlong(get_dataframe):
    """
    Query can be aggregated to a spatial level with lat-lon data.
    """
    hl = ModalLocation(
        *[
            daily_location(d, level="lat-lon", method="last")
            for d in list_of_dates("2016-01-01", "2016-01-03")
        ]
    )
    agg = hl.aggregate()
    df = get_dataframe(agg)
    assert ["lat", "lon", "total"] == list(df.columns)
Exemple #16
0
def test_selected_values(get_dataframe):
    """
    ModalLocation() values are correct.
    """
    hdf = get_dataframe(
        ModalLocation(
            *[daily_location(d) for d in list_of_dates("2016-01-01", "2016-01-03")]
        )
    ).set_index("subscriber")

    assert "Dolpa" == hdf.ix["038OVABN11Ak4W5P"][0]
    assert "Baglung" == hdf.ix["E1n7JoqxPBjvR5Ve"][0]
    assert "Myagdi" == hdf.ix["gkBLe0mN5j3qmRpX"][0]
    assert "Kapilbastu" == hdf.ix["5Kgwy8Gp6DlN3Eq9"][0]
Exemple #17
0
def test_contact_reference_location_bad_spatial_unit_raises():
    """
    Test InvalidSpatialUnitError is raised for contact_location with
    non-compliant spatial unit.
    """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    ml = ModalLocation(*[
        daily_location(
            d,
            spatial_unit=make_spatial_unit("admin", level=3),
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    with pytest.raises(InvalidSpatialUnitError):
        query = ContactReferenceLocationStats(cb, ml)
Exemple #18
0
def test_contact_reference_location_no_spatial_unit_raises():
    """ Test ValueError is raised for contact_location without spatial_unit attribute. """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    # by encapsulating ModalLocations in a CustomQuery we remove the spatial_unit
    # attribute from it which should raise an error
    ml = ModalLocation(*[
        daily_location(
            d,
            spatial_unit=make_spatial_unit("versioned-cell"),
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    ml = CustomQuery(ml.get_query(), ml.column_names)
    with pytest.raises(ValueError):
        query = ContactReferenceLocationStats(cb, ml)
Exemple #19
0
def test_contact_reference_location_stats(get_dataframe, statistic, msisdn,
                                          spatial_unit_type, want):
    """ Test a few hand-picked ContactReferenceLocationStats. """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    ml = ModalLocation(*[
        daily_location(
            d,
            spatial_unit=make_spatial_unit(spatial_unit_type),
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    cb.store()
    ml.store()
    query = ContactReferenceLocationStats(cb, ml, statistic=statistic)
    df = get_dataframe(query).set_index("subscriber")
    assert df.value[msisdn] == pytest.approx(want)
Exemple #20
0
def test_get_all_users_in_modal_location(get_dataframe):
    """
    This tests that diplacement values are returned for all subscribers
    in the home location object.
    """

    p1 = ("2016-01-02 10:00:00", "2016-01-02 12:00:00")
    p2 = ("2016-01-01 12:01:00", "2016-01-01 15:20:00")

    hl = ModalLocation(*[
        daily_location(d, level="lat-lon", hours=(12, 13))
        for d in list_of_dates(p1[0], p1[1])
    ])
    d = Displacement(p2[0], p2[1], modal_locations=hl)

    hl_subscribers = set(get_dataframe(hl).subscriber)
    d_subscribers = set(get_dataframe(d).subscriber)

    assert not (hl_subscribers - d_subscribers)
Exemple #21
0
def test_pass_modal_location(get_dataframe):
    """
    Test that we can pass a home location object to the class
    """

    hl = ModalLocation(*[
        daily_location(d, level="lat-lon")
        for d in list_of_dates("2016-01-01", "2016-01-06")
    ])

    df = get_dataframe(
        Displacement("2016-01-01",
                     "2016-01-07",
                     modal_locations=hl,
                     statistic="avg"))
    df = df.set_index("subscriber")

    val = df.loc["j6QYNbMJgAwlVORP"].statistic
    assert val == pytest.approx(169.926194)
Exemple #22
0
def test_subscriber_with_home_loc_but_no_calls_is_nan(get_dataframe):
    """
    Test that a subscriber who has no activity between start and stop
    but has a home location returns a nan value
    """

    p1 = ("2016-01-02 10:00:00", "2016-01-02 12:00:00")
    p2 = ("2016-01-01 12:01:00", "2016-01-01 15:20:00")
    subscriber = "OdM7np8LYEp1mkvP"

    hl = ModalLocation(*[
        daily_location(d, level="lat-lon", hours=(12, 13))
        for d in list_of_dates(p1[0], p1[1])
    ])
    d = Displacement(p2[0], p2[1], modal_locations=hl)

    df = get_dataframe(d).set_index("subscriber")

    assert isnan(df.loc[subscriber].statistic)
def test_pass_modal_location(get_dataframe):
    """
    Test that we can pass a home location object to the class
    """

    ml = ModalLocation(
        *[
            daily_location(d, spatial_unit=make_spatial_unit("lon-lat"))
            for d in list_of_dates("2016-01-01", "2016-01-06")
        ]
    )

    df = get_dataframe(
        Displacement("2016-01-01", "2016-01-07", modal_locations=ml, statistic="avg")
    )
    df = df.set_index("subscriber")

    val = df.loc["j6QYNbMJgAwlVORP"].statistic
    assert val == pytest.approx(176.903620)
Exemple #24
0
    def __init__(
        self,
        start,
        stop,
        method="home-location",
        spatial_unit: Optional[LonLatSpatialUnit] = None,
        **kwargs,
    ):

        warnings.warn(
            "The PWO model is currently **experimental**. "
            + "Please review Yan X-Y et al. "
            + "(http://dx.doi.org/10.1098/rsif.2014.0834) "
            + "before using this model in production."
        )

        self.start = start
        self.stop = stop
        self.method = method
        if spatial_unit is None:
            self.spatial_unit = make_spatial_unit("versioned-site")
        else:
            self.spatial_unit = spatial_unit
        self.distance_matrix = DistanceMatrix(
            spatial_unit=self.spatial_unit, return_geometry=True
        )

        if self.method == "home-location":
            self.population_object = ModalLocation(
                *[
                    daily_location(d, spatial_unit=self.spatial_unit, **kwargs)
                    for d in list_of_dates(self.start, self.stop)
                ]
            ).aggregate()

        self.population_buffer_object = _populationBuffer(
            population_object=self.population_object,
            distance_matrix=self.distance_matrix,
        )
Exemple #25
0
def test_contact_reference_location_stats_custom_geometry(get_dataframe):
    """ Test ContactReferenceLocationStats with custom geometry column. """
    cb = ContactBalance("2016-01-01", "2016-01-03")
    ml = ModalLocation(*[
        daily_location(
            d,
            spatial_unit=make_spatial_unit("versioned-cell"),
            subscriber_subset=cb.counterparts_subset(include_subscribers=True),
        ) for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    cb.store()
    ml.store()
    ml = CustomQuery(
        f"SELECT subscriber, ST_POINT(lon, lat) AS loc FROM ({ml.get_query()}) _",
        ["subscriber", "loc"],
    )
    query = ContactReferenceLocationStats(cb,
                                          ml,
                                          statistic="avg",
                                          geom_column="loc")
    df = get_dataframe(query).set_index("subscriber")
    assert df.value["gwAynWXp4eWvxGP7"] == pytest.approx(298.7215)
Exemple #26
0
def test_query_can_be_subscriber_set_restricted(subscriber_list_table,
                                                subscriber_list,
                                                get_dataframe):
    """Test that some queries can be limited to only a subset of subscribers."""

    rog = RadiusOfGyration("2016-01-01",
                           "2016-01-03",
                           subscriber_subset=subscriber_list_table)
    hl = ModalLocation(*[
        daily_location(d, subscriber_subset=subscriber_list_table)
        for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    rog_df = get_dataframe(rog)
    hl_df = get_dataframe(hl)

    # Get the set of subscribers present in the dataframe, we need to handle the logic
    # of msisdn_from/msisdn_to
    calculated_subscriber_set = set(rog_df.subscriber)

    assert calculated_subscriber_set == set(subscriber_list)
    calculated_subscriber_set = set(hl_df.subscriber)

    assert calculated_subscriber_set == set(subscriber_list)
    def test_query_can_be_subscriber_set_restricted(self):
        """Test that some queries can be limited to only a subset of subscribers."""

        # Create a temporary table in the DB
        con = Table.connection.engine

        sql = "DROP TABLE IF EXISTS subscriber_list"
        con.execute(sql)

        sql = """CREATE TABLE subscriber_list (subscriber TEXT)"""
        con.execute(sql)

        formatted_subscribers = ",".join("('{}')".format(u)
                                         for u in self.subscriber_list)
        sql = """INSERT INTO subscriber_list (subscriber) VALUES {}""".format(
            formatted_subscribers)
        con.execute(sql)
        rog = RadiusOfGyration("2016-01-01",
                               "2016-01-03",
                               subscriber_subset=Table("subscriber_list"))
        hl = HomeLocation(*[
            daily_location(d, subscriber_subset=Table("subscriber_list"))
            for d in list_of_dates("2016-01-01", "2016-01-03")
        ])
        rog_df = rog.get_dataframe()
        hl_df = hl.get_dataframe()
        sql = "DROP TABLE IF EXISTS subscriber_list"
        con.execute(sql)

        # Get the set of subscribers present in the dataframe, we need to handle the logic
        # of msisdn_from/msisdn_to
        calculated_subscriber_set = set(rog_df.subscriber)

        self.assertEqual(calculated_subscriber_set, set(self.subscriber_list))
        calculated_subscriber_set = set(hl_df.subscriber)

        self.assertEqual(calculated_subscriber_set, set(self.subscriber_list))
    def __init__(
        self,
        start,
        stop,
        modal_locations=None,
        statistic="avg",
        unit="km",
        hours="all",
        method="last",
        table="all",
        subscriber_identifier="msisdn",
        ignore_nulls=True,
        subscriber_subset=None,
    ):

        # need to subtract one day from hl end in order to be
        # comparing over same period...
        self.stop_sl = stop
        self.stop_hl = str(parse_datestring(stop) - relativedelta(days=1))

        self.start = start

        if modal_locations:
            if isinstance(modal_locations, ModalLocation):
                hl = modal_locations
            else:
                raise ValueError(
                    "Argument 'modal_locations' should be an instance of ModalLocation class"
                )
            hl.spatial_unit.verify_criterion("has_lon_lat_columns")
        else:
            hl = ModalLocation(*[
                daily_location(
                    date,
                    spatial_unit=make_spatial_unit("lon-lat"),
                    hours=hours,
                    method=method,
                    table=table,
                    subscriber_identifier=subscriber_identifier,
                    ignore_nulls=ignore_nulls,
                    subscriber_subset=subscriber_subset,
                ) for date in list_of_dates(self.start, self.stop_hl)
            ])

        sl = SubscriberLocations(
            self.start,
            self.stop_sl,
            spatial_unit=make_spatial_unit("lon-lat"),
            hours=hours,
            table=table,
            subscriber_identifier=subscriber_identifier,
            ignore_nulls=ignore_nulls,
            subscriber_subset=subscriber_subset,
        )

        self.statistic = statistic.lower()
        if self.statistic not in valid_stats:
            raise ValueError(
                "{} is not a valid statistic. Use one of {}".format(
                    self.statistic, valid_stats))

        self.joined = hl.join(
            sl,
            on_left="subscriber",
            on_right="subscriber",
            how="left",
            left_append="_home_loc",
            right_append="",
        )

        self.unit = unit

        super().__init__()