Beispiel #1
0
 def setUp(self):
     self.rog = RadiusOfGyration("2016-01-01", "2016-01-02")
     self.low = 150
     self.high = 155
     self.rog_df = self.rog.get_dataframe().query(
         "{low} <= rog <= {high}".format(low=self.low, high=self.high))
     self.sub = self.rog.numeric_subset(col="rog",
                                        low=self.low,
                                        high=self.high)
Beispiel #2
0
def test_joined_agg_date_mismatch():
    """
    Test that join aggregate with mismatched dates raises a warning.
    """
    mfl = MostFrequentLocation("2016-01-01", "2016-01-04", level="admin3")
    with pytest.warns(UserWarning):
        mfl.join_aggregate(RadiusOfGyration("2016-01-02", "2016-01-04"))

    with pytest.warns(UserWarning):
        mfl.join_aggregate(RadiusOfGyration("2016-01-01", "2016-01-05"))
def test_can_numsubset_with_inf(get_dataframe):
    """
    flowmachine.RadiusOfGyration can be subset between -Inf and Inf
    """
    rog = RadiusOfGyration("2016-01-01", "2016-01-02")
    low = -float("Infinity")
    high = float("Infinity")
    sub = get_dataframe(rog.numeric_subset(col="value", low=low, high=high))
    df = get_dataframe(rog).query("{low} <= value <= {high}".format(low=low,
                                                                    high=high))
    pd.testing.assert_frame_equal(sub, df)
 def test_joined_median_aggregate(self):
     """
     Test join with median aggregate.
     """
     mfl = MostFrequentLocation("2016-01-01", "2016-01-04", level="admin3")
     rog = RadiusOfGyration("2016-01-01", "2016-01-04")
     joined = mfl.join_aggregate(rog, method="median")
     rawus_avg = (rog.get_dataframe().set_index("subscriber").join(
         mfl.get_dataframe().set_index("subscriber")).set_index(
             "name").ix["Rasuwa"].rog.median())
     self.assertAlmostEqual(
         joined.get_dataframe().set_index("name").ix["Rasuwa"].rog,
         rawus_avg)
def test_can_numsubset_with_low_and_high(get_dataframe):
    """
    flowmachine.RadiusOfGyration can be subset within a range
    """
    rog = RadiusOfGyration("2016-01-01", "2016-01-02")
    low = 150
    high = 155
    rog_df = (get_dataframe(rog).query("{low} <= value <= {high}".format(
        low=low, high=high)).set_index("subscriber"))
    sub = get_dataframe(rog.numeric_subset(col="value", low=low,
                                           high=high)).set_index("subscriber")

    pd.testing.assert_frame_equal(sub, rog_df)
def test_num_subset_can_be_stored(get_dataframe):
    """
    Test that flowmachine.NumericSubset can be stored.
    """
    rog = RadiusOfGyration("2016-01-01", "2016-01-02")
    low = 150
    high = 155
    rog_df = get_dataframe(rog).query("{low} <= value <= {high}".format(
        low=low, high=high))
    sub = rog.numeric_subset(col="value", low=low, high=high)
    sub.store().result()
    assert sub.is_stored
    # Test that the store is of the right length
    sub = rog.numeric_subset(col="value", low=low, high=high)
    assert len(get_dataframe(sub)) == len(rog_df)
def test_dropna(get_length):
    """
    Test that we are able to keep rows with NA values.
    """

    start, stop = "2016-01-01", "2016-01-03"
    msisdn = "1vGR8kp342yxEpwY"
    sql = """
    select 
        msisdn as subscriber,
        2 as val 
    from 
        events.calls 
    where 
        msisdn = '{}' 
    limit 1
    """.format(
        msisdn
    )

    metrics = [CustomQuery(sql, ["subscriber"]), RadiusOfGyration(start, stop)]
    fc = feature_collection(metrics, dropna=False)

    # usully without dropna=False this query would only return
    # a single row. We check that this is not the case.
    assert get_length(fc) > 1
Beispiel #8
0
def test_joined_agg_hours_mismatch():
    """
    Test that join aggregate with mismatched hours doesn't warn.
    """
    mfl = MostFrequentLocation("2016-01-01 10:00", "2016-01-04", level="admin3")
    with warnings.catch_warnings(record=True) as w:
        mfl.join_aggregate(RadiusOfGyration("2016-01-01", "2016-01-04"))
        assert not w
Beispiel #9
0
def test_histogram_param_value_errors(param_name, param_value,
                                      expected_exception):
    radius_of_gyration = RadiusOfGyration("2016-01-01", "2016-01-02")

    args = dict(bins=10, metric=radius_of_gyration)
    args[param_name] = param_value
    with pytest.raises(ValueError, match=expected_exception):
        HistogramAggregation(**args)
def test_can_get_item_subscriber_metric(get_dataframe):
    """g
    flowmachine.SubscriberFeature allows for getting items
    """
    rog = RadiusOfGyration("2016-01-01", "2016-01-03")
    dl = daily_location("2016-01-03")
    single_subscriber = list(get_dataframe(dl).head(8).subscriber)[3]
    sub = get_dataframe(rog[single_subscriber])
    assert set(sub.subscriber) == {single_subscriber}
Beispiel #11
0
def test_joined_aggregate(get_dataframe):
    """
    Test join aggregate.
    """
    mfl = MostFrequentLocation("2016-01-01", "2016-01-04", level="admin3")
    joined = mfl.join_aggregate(RadiusOfGyration("2016-01-01", "2016-01-04"))
    assert (
        pytest.approx(203.12391560786)
        == get_dataframe(joined).set_index("pcod").loc["524 2 05 29"].rog
    )
Beispiel #12
0
def test_joined_aggregate(get_dataframe):
    """
    Test join aggregate.
    """
    mfl = MostFrequentLocation("2016-01-01", "2016-01-04", level="admin3")
    joined = mfl.join_aggregate(RadiusOfGyration("2016-01-01", "2016-01-04"))
    assert (
        pytest.approx(199.956021886114)
        == get_dataframe(joined).set_index("name").ix["Rasuwa"].rog
    )
 def test_joined_aggregate(self):
     """
     Test join aggregate.
     """
     mfl = MostFrequentLocation("2016-01-01", "2016-01-04", level="admin3")
     joined = mfl.join_aggregate(
         RadiusOfGyration("2016-01-01", "2016-01-04"))
     self.assertAlmostEqual(
         joined.get_dataframe().set_index("name").ix["Rasuwa"].rog,
         199.956021886114)
    def _flowmachine_query_obj(self):
        """
        Return the underlying flowmachine radius_of_gyration object.

        Returns
        -------
        Query
        """
        return RadiusOfGyration(
            start=self.start_date,
            stop=self.end_date,
            subscriber_subset=self.subscriber_subset,
        )
Beispiel #15
0
def test_create_histogram_using_int_bins_value(get_dataframe):
    """
    Create histogram using one bins value.
    """
    radius_of_gyration = RadiusOfGyration("2016-01-01", "2016-01-02")

    agg = HistogramAggregation(metric=radius_of_gyration, bins=5, censor=False)
    df = get_dataframe(agg)
    numpy_histogram, numpy_bins = np.histogram(
        get_dataframe(radius_of_gyration).value, bins=5)
    assert df.value.sum() == len(get_dataframe(radius_of_gyration))
    assert numpy_histogram.tolist() == df.value.tolist()
    assert numpy_bins.tolist()[:-1] == pytest.approx(df.lower_edge.tolist())
    assert numpy_bins.tolist()[1:] == pytest.approx(df.upper_edge.tolist())
    def test_query_can_be_subscriber_set_restricted(self):
        """Test that some queries can be limited to only a subset of subscribers."""

        # Create a temporary table in the DB
        con = Table.connection.engine

        sql = "DROP TABLE IF EXISTS subscriber_list"
        con.execute(sql)

        sql = """CREATE TABLE subscriber_list (subscriber TEXT)"""
        con.execute(sql)

        formatted_subscribers = ",".join("('{}')".format(u)
                                         for u in self.subscriber_list)
        sql = """INSERT INTO subscriber_list (subscriber) VALUES {}""".format(
            formatted_subscribers)
        con.execute(sql)
        rog = RadiusOfGyration("2016-01-01",
                               "2016-01-03",
                               subscriber_subset=Table("subscriber_list"))
        hl = HomeLocation(*[
            daily_location(d, subscriber_subset=Table("subscriber_list"))
            for d in list_of_dates("2016-01-01", "2016-01-03")
        ])
        rog_df = rog.get_dataframe()
        hl_df = hl.get_dataframe()
        sql = "DROP TABLE IF EXISTS subscriber_list"
        con.execute(sql)

        # Get the set of subscribers present in the dataframe, we need to handle the logic
        # of msisdn_from/msisdn_to
        calculated_subscriber_set = set(rog_df.subscriber)

        self.assertEqual(calculated_subscriber_set, set(self.subscriber_list))
        calculated_subscriber_set = set(hl_df.subscriber)

        self.assertEqual(calculated_subscriber_set, set(self.subscriber_list))
Beispiel #17
0
    def _unsampled_query_obj(self):
        """
        Return the underlying flowmachine radius_of_gyration object.

        Returns
        -------
        Query
        """
        return RadiusOfGyration(
            start=self.start_date,
            stop=self.end_date,
            table=self.event_types,
            subscriber_subset=self.subscriber_subset,
            hours=self.hours,
        )
Beispiel #18
0
def test_create_histogram_using_list_of_bins_values(get_dataframe):
    """
    Create histogram using list of bins values.
    """
    radius_of_gyration = RadiusOfGyration("2016-01-01", "2016-01-02")

    agg = HistogramAggregation(metric=radius_of_gyration,
                               bins=[10, 20, 30, 40, 50, 60],
                               censor=False)
    df = get_dataframe(agg)
    numpy_histogram, numpy_bins = np.histogram(
        get_dataframe(radius_of_gyration).value, bins=[10, 20, 30, 40, 50, 60])
    assert numpy_histogram.tolist() == df.value.tolist()
    assert numpy_bins.tolist()[:-1] == pytest.approx(df.lower_edge.tolist())
    assert numpy_bins.tolist()[1:] == pytest.approx(df.upper_edge.tolist())
def test_call_with_str_raises_error():
    """
    Numeric subset can't be called with a string in arguments low and high
    """
    rog = RadiusOfGyration("2016-01-01", "2016-01-02")

    with pytest.raises(TypeError):
        rog.numeric_subset(col="value", low="foo", high=1)
    with pytest.raises(TypeError):
        rog.numeric_subset(col="value", low=1, high="bar")
def test_all_above_threshold(get_dataframe):
    """
    Test that values are not returned where there are not enough people in the aggregate.
    """
    in_agg = get_dataframe(
        RedactedJoinedSpatialAggregate(
            joined_spatial_aggregate=JoinedSpatialAggregate(
                locations=daily_location("2016-01-01"),
                metric=RadiusOfGyration("2016-01-01", "2016-01-02"),
            ))).pcod
    assert len(in_agg) > 0
    under_15 = get_dataframe(
        daily_location("2016-01-01").aggregate().numeric_subset(col="value",
                                                                low=0,
                                                                high=15)).pcod
    assert set(under_15).isdisjoint(in_agg)
Beispiel #21
0
def test_create_histogram_using_bins_and_range_values(get_dataframe):
    """
    Create histogram using one bins and range values.
    """
    radius_of_gyration = RadiusOfGyration("2016-01-01", "2016-01-02")

    agg = HistogramAggregation(metric=radius_of_gyration,
                               bins=5,
                               range=(130.00, 230.00),
                               censor=False)
    df = get_dataframe(agg)
    numpy_histogram, numpy_bins = np.histogram(
        get_dataframe(radius_of_gyration).value,
        bins=5,
        range=(130.00, 230.00))

    assert numpy_histogram.tolist() == df.value.tolist()
    assert numpy_bins.tolist()[:-1] == pytest.approx(df.lower_edge.tolist())
    assert numpy_bins.tolist()[1:] == pytest.approx(df.upper_edge.tolist())
Beispiel #22
0
def test_joined_median_aggregate(get_dataframe):
    """
    Test join with median aggregate.
    """
    mfl = MostFrequentLocation("2016-01-01", "2016-01-04", level="admin3")
    rog = RadiusOfGyration("2016-01-01", "2016-01-04")
    joined = mfl.join_aggregate(rog, method="median")
    rawus_avg = (
        get_dataframe(rog)
        .set_index("subscriber")
        .join(get_dataframe(mfl).set_index("subscriber"))
        .set_index("pcod")
        .loc["524 2 05 29"]
        .rog.median()
    )
    assert (
        pytest.approx(rawus_avg)
        == get_dataframe(joined).set_index("pcod").loc["524 2 05 29"].rog
    ), rawus_avg
Beispiel #23
0
def test_collects_metrics():
    """
    Test that we can instantiate flowmachine.feature_collection with list of
    objects.
    """

    start, stop = "2016-01-01", "2016-01-03"
    metrics = [
        RadiusOfGyration(start, stop),
        NocturnalEvents(start, stop),
        SubscriberDegree(start, stop),
    ]
    expected_columns = [
        "subscriber",
        "value_radiusofgyration_0",
        "value_nocturnalevents_1",
        "value_subscriberdegree_2",
    ]
    fc = feature_collection(metrics)
    column_names = fc.column_names
    assert expected_columns == column_names
Beispiel #24
0
def test_create_histogram_using_bins_list_and_range_values(get_dataframe):
    """
    Create histogram using list of bins and range values (checking for consistency with numpy).
    """
    radius_of_gyration = RadiusOfGyration("2016-01-01", "2016-01-02")

    agg = HistogramAggregation(
        metric=radius_of_gyration,
        bins=[10, 20, 30, 40, 50, 60],
        range=(130.00, 230.00),
        censor=False,
    )
    df = get_dataframe(agg)
    numpy_histogram, numpy_bins = np.histogram(
        get_dataframe(radius_of_gyration).value,
        bins=[10, 20, 30, 40, 50, 60],
        range=(130.00, 230.00),
    )

    assert numpy_histogram.tolist() == df.value.tolist()
    assert numpy_bins.tolist()[:-1] == pytest.approx(df.lower_edge.tolist())
    assert numpy_bins.tolist()[1:] == pytest.approx(df.upper_edge.tolist())
Beispiel #25
0
def test_query_can_be_subscriber_set_restricted(subscriber_list_table,
                                                subscriber_list,
                                                get_dataframe):
    """Test that some queries can be limited to only a subset of subscribers."""

    rog = RadiusOfGyration("2016-01-01",
                           "2016-01-03",
                           subscriber_subset=subscriber_list_table)
    hl = ModalLocation(*[
        daily_location(d, subscriber_subset=subscriber_list_table)
        for d in list_of_dates("2016-01-01", "2016-01-03")
    ])
    rog_df = get_dataframe(rog)
    hl_df = get_dataframe(hl)

    # Get the set of subscribers present in the dataframe, we need to handle the logic
    # of msisdn_from/msisdn_to
    calculated_subscriber_set = set(rog_df.subscriber)

    assert calculated_subscriber_set == set(subscriber_list)
    calculated_subscriber_set = set(hl_df.subscriber)

    assert calculated_subscriber_set == set(subscriber_list)
Beispiel #26
0
    def setUp(self):

        self.dl = daily_location("2016-01-03")
        self.rog = RadiusOfGyration("2016-01-01", "2016-01-03")
        self.subscriber_list = list(self.dl.head(8).subscriber)
        self.single_subscriber = self.subscriber_list[3]
Beispiel #27
0
class TestNumericSubsetting(TestCase):
    def setUp(self):
        self.rog = RadiusOfGyration("2016-01-01", "2016-01-02")
        self.low = 150
        self.high = 155
        self.rog_df = self.rog.get_dataframe().query(
            "{low} <= rog <= {high}".format(low=self.low, high=self.high))
        self.sub = self.rog.numeric_subset(col="rog",
                                           low=self.low,
                                           high=self.high)

    def _query_has_values(self, Q, expected_df):
        """
        Test if the values of a dataframes columns are equal
        to certain values.
        """
        query_df = Q.get_dataframe()
        assert_array_equal(expected_df.values, query_df.values)

    def test_can_numsubset_with_low_and_high(self):
        """
        flowmachine.RadiusOfGyration can be subset within a range
        """

        self._query_has_values(self.sub, self.rog_df)

    def test_can_numsubset_with_inf(self):
        """
        flowmachine.RadiusOfGyration can be subset between -Inf and Inf
        """

        low = -float("Infinity")
        high = float("Infinity")
        sub = self.rog.numeric_subset(col="rog", low=low, high=high)
        df = self.rog.get_dataframe().query("{low} <= rog <= {high}".format(
            low=low, high=high))
        self._query_has_values(sub, df)

    def test_call_with_str_raises_error(self):
        """
        Numeric subset can't be called with a string in arguments low and high
        """
        with self.assertRaises(TypeError):
            self.rog.numeric_subset(col="rog", low="foo", high=self.high)
        with self.assertRaises(TypeError):
            self.rog.numeric_subset(col="rog", low=self.low, high="bar")

    def test_can_be_stored(self):
        """
        Test that flowmachine.NumericSubset can be stored.
        """
        self.sub.store().result()
        self.assertTrue(self.sub.is_stored)
        # Test that the store is of the right length
        sub = self.rog.numeric_subset(col="rog", low=self.low, high=self.high)
        self.assertEqual(len(sub.get_dataframe()), len(self.rog_df))

    def tearDown(self):
        """
        Remove stored table from "can_be_stored" test.
        """
        self.sub.invalidate_db_cache()