Ejemplo n.º 1
0
def test_join_returns_the_same_clusters():
    """
    Test whether joining to another table for which the start and stop time are the same yields the same clusters.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    hartigan = HartiganCluster(calldays=cd, radius=50)
    har_df = hartigan.to_geopandas()
    es = EventScore(
        start="2016-01-01",
        stop="2016-01-04",
        spatial_unit=make_spatial_unit("versioned-site"),
    )

    joined = (
        hartigan.join_to_cluster_components(es).to_geopandas().sort_values(
            ["subscriber", "rank", "calldays"]))
    joined.reset_index(inplace=True, drop=True)

    har_df.sort_values(["subscriber", "rank", "calldays"], inplace=True)
    har_df.reset_index(inplace=True, drop=True)

    cols = ["subscriber", "geometry", "rank", "calldays"]
    compare = joined[cols] == har_df[cols]
    assert all(compare.all())
Ejemplo n.º 2
0
def test_joined_hartigan_column_names():
    """Test that Hartigan has correct column_names property."""
    cd = CallDays("2016-01-01", "2016-01-04", level="versioned-site")
    hartigan = HartiganCluster(cd, 50)
    es = EventScore(start="2016-01-01",
                    stop="2016-01-05",
                    level="versioned-site")
    joined = hartigan.join_to_cluster_components(es)
    assert joined.head(0).columns.tolist() == joined.column_names
Ejemplo n.º 3
0
def test_joined_hartigan_type_error():
    """Test that joining hartigan to something which isn't query like raises a type error."""
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))
    hartigan = HartiganCluster(calldays=cd, radius=50)
    with pytest.raises(TypeError):
        hartigan.join_to_cluster_components("banana")
Ejemplo n.º 4
0
def test_joined_hartigan_column_names(get_column_names_from_run):
    """Test that Hartigan has correct column_names property."""
    cd = CallDays(
        subscriber_locations("2016-01-01",
                             "2016-01-04",
                             level="versioned-site"))
    hartigan = HartiganCluster(calldays=cd, radius=50)
    es = EventScore(start="2016-01-01",
                    stop="2016-01-05",
                    level="versioned-site")
    joined = hartigan.join_to_cluster_components(es)
    assert get_column_names_from_run(joined) == joined.column_names
def test_meaningful_locations_od_results(get_dataframe):
    """
    Test that OD on MeaningfulLocations returns expected results and counts clusters per subscriber correctly.
    """
    # FIXME: Because of the nature of the test data, we can't actually test much for most admin levels because
    # the counts will always be below 15, and hence get redacted!
    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="unknown",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-02",
                stop="2016-01-03",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-02",
            stop="2016-01-03",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="unknown",
    )
    mfl_od = MeaningfulLocationsOD(
        meaningful_locations_a=mfl_a,
        meaningful_locations_b=mfl_b,
        spatial_unit=make_spatial_unit("admin", level=1),
    )
    mfl_od_df = get_dataframe(mfl_od)
    # Aggregate should not include any counts below 15
    assert all(mfl_od_df.total > 15)
    # Smoke test one admin1 region gets the expected result
    assert mfl_od_df[(mfl_od_df.pcod_from == "524 1") & (
        mfl_od_df.pcod_to == "524 4")].total[0] == pytest.approx(16.490_807)
    assert mfl_od_df.total.sum() == pytest.approx(350.806_012)
Ejemplo n.º 6
0
def test_meaningful_locations_od_results(get_dataframe,
                                         meaningful_locations_labels):
    """
    Test that OD on MeaningfulLocations returns expected results and counts clusters per subscriber correctly.
    """
    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-02",
                stop="2016-01-03",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-02",
            stop="2016-01-03",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )
    mfl_od = MeaningfulLocationsOD(
        meaningful_locations_a=mfl_a,
        meaningful_locations_b=mfl_b,
        spatial_unit=make_spatial_unit("admin", level=1),
    )
    mfl_od_df = get_dataframe(mfl_od)
    # Smoke test one admin1 region gets the expected result
    regional_flow = mfl_od_df[(mfl_od_df.pcod_from == "524 1") &
                              (mfl_od_df.pcod_to == "524 4")].value.tolist()[0]
    assert regional_flow == pytest.approx(16.490_807)
    assert mfl_od_df.value.sum() == pytest.approx(454.0)
Ejemplo n.º 7
0
def test_joined_hartigan_cluster_bad_query_column_names_raises_error():
    """
    Test that joining a HartiganCluster to a query without 'site_id' and 'version' columns raises an error.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))
    hartigan = HartiganCluster(calldays=cd, radius=50)
    es = EventScore(start="2016-01-01",
                    stop="2016-01-04",
                    spatial_unit=make_spatial_unit("lon-lat"))
    with pytest.raises(ValueError):
        hartigan.join_to_cluster_components(es)
Ejemplo n.º 8
0
def test_column_names_meaningful_locations_od(exemplar_spatial_unit_param,
                                              get_column_names_from_run,
                                              meaningful_locations_labels):
    """Test that column_names property matches head(0) for an od matrix between meaningful locations"""
    if not exemplar_spatial_unit_param.is_polygon:
        pytest.xfail(
            f"The spatial unit {exemplar_spatial_unit_param} is not supported as an aggregation unit for ODs between MeaningfulLocations."
        )
    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="evening",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )
    mfl_od = MeaningfulLocationsOD(
        meaningful_locations_a=mfl_a,
        meaningful_locations_b=mfl_b,
        spatial_unit=exemplar_spatial_unit_param,
    )

    assert get_column_names_from_run(mfl_od) == mfl_od.column_names
def test_meaningful_locations_od_redaction(get_dataframe,
                                           meaningful_locations_labels):
    """
    Test that OD on MeaningfulLocations is redacted to >15.
    """

    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-02",
                stop="2016-01-03",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-02",
            stop="2016-01-03",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="unknown",
    )
    mfl_od = RedactedMeaningfulLocationsOD(
        meaningful_locations_od=MeaningfulLocationsOD(
            meaningful_locations_a=mfl_a,
            meaningful_locations_b=mfl_b,
            spatial_unit=make_spatial_unit("admin", level=1),
        ))
    mfl_od_df = get_dataframe(mfl_od)
    # Aggregate should not include any counts below 15
    assert all(mfl_od_df.value > 15)
Ejemplo n.º 10
0
def test_call_threshold_works(get_dataframe):
    """
    Test whether a call threshold above 1 limits the number of clusters.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    hartigan = HartiganCluster(calldays=cd, radius=50)
    har_df = hartigan.to_geopandas()
    assert any(har_df.calldays == 1)
    har_df_higher_call_threshold = get_dataframe(
        HartiganCluster(calldays=cd, radius=50, call_threshold=2))
    assert len(har_df) > len(har_df_higher_call_threshold)
Ejemplo n.º 11
0
def test_cluster_is_within_envelope(get_dataframe):
    """
    Test that all the clusters are within the enveloped formed by all the towers in the cluster.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    hartigan = HartiganCluster(calldays=cd, radius=50)
    har_df = hartigan.to_geopandas()
    sites = Sites().to_geopandas().set_index(["site_id", "version"])
    towers = GeoSeries(har_df.apply(lambda x: get_geom_point(x, sites), 1))
    s = har_df.intersects(towers)
    assert all(s)
def test_meaningful_locations_od_raises_for_bad_spatial_unit(
        exemplar_spatial_unit_param, get_dataframe):
    """
    Test that od on meaningful locations raises an InvalidSpatialUnitError for a bad spatial unit.
    """
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="evening",
    )

    with pytest.raises(InvalidSpatialUnitError):
        mfl_od = MeaningfulLocationsOD(
            meaningful_locations_a=mfl,
            meaningful_locations_b=mfl,
            spatial_unit=make_spatial_unit("lon-lat"),
        )
Ejemplo n.º 13
0
 def test_call_threshold_works(self):
     """
     Test whether a call threshold above 1 limits the number of clusters.
     """
     self.assertTrue(any(self.har_df.calldays == 1))
     har = HartiganCluster(self.cd, 50, call_threshold=2).get_dataframe()
     self.assertFalse(all(self.har_df.calldays > 1))
def test_meaningful_locations_aggregate_disallowed_spatial_unit_raises(
    meaningful_locations_labels, ):
    """Test that a bad spatial unit raises an InvalidSpatialUnitError"""

    with pytest.raises(InvalidSpatialUnitError):
        mfl_agg = MeaningfulLocationsAggregate(
            meaningful_locations=MeaningfulLocations(
                clusters=HartiganCluster(
                    calldays=CallDays(subscriber_locations=SubscriberLocations(
                        start="2016-01-01",
                        stop="2016-01-02",
                        spatial_unit=make_spatial_unit("versioned-site"),
                    )),
                    radius=1,
                ),
                scores=EventScore(
                    start="2016-01-01",
                    stop="2016-01-02",
                    spatial_unit=make_spatial_unit("versioned-site"),
                ),
                labels=meaningful_locations_labels,
                label="evening",
            ),
            spatial_unit=make_spatial_unit("lon-lat"),
        )
def test_meaningful_locations_results(label, expected_number_of_clusters,
                                      get_dataframe):
    """
    Test that MeaningfulLocations returns expected results and counts clusters per subscriber correctly.
    """
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label=label,
    )
    mfl_df = get_dataframe(mfl)
    assert len(mfl_df) == expected_number_of_clusters
    count_clusters = mfl_df.groupby(["subscriber", "label", "n_clusters"],
                                    as_index=False).count()
    # Check that query has correctly counted the number of clusters per subscriber
    assert all(count_clusters.n_clusters == count_clusters.cluster)
Ejemplo n.º 16
0
def test_meaningful_locations_aggregation_results(exemplar_level_param,
                                                  get_dataframe):
    """
    Test that aggregating MeaningfulLocations returns expected results and redacts values below 15.
    """
    if exemplar_level_param[
            "level"] not in MeaningfulLocationsAggregate.allowed_levels:
        pytest.xfail(
            f'The level "{exemplar_level_param["level"]}" is not supported as an aggregation unit for MeaningfulLocations.'
        )
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=subscriber_locations(
                start="2016-01-01", stop="2016-01-02",
                level="versioned-site")),
            radius=1,
        ),
        scores=EventScore(start="2016-01-01",
                          stop="2016-01-02",
                          level="versioned-site"),
        labels=labels,
        label="evening",
    )
    mfl_agg = MeaningfulLocationsAggregate(meaningful_locations=mfl,
                                           **exemplar_level_param)
    mfl_df = get_dataframe(mfl)
    mfl_agg_df = get_dataframe(mfl_agg)
    # Aggregate should not include any counts below 15
    assert all(mfl_agg_df.total > 15)
    # Sum of aggregate should be less than the number of unique subscribers
    assert mfl_agg_df.total.sum() < mfl_df.subscriber.nunique()
Ejemplo n.º 17
0
def test_column_names_meaningful_locations_aggregate(
        exemplar_level_param, get_column_names_from_run):
    """ Test that column_names property matches head(0) for aggregated meaningful locations"""
    if exemplar_level_param[
            "level"] not in MeaningfulLocationsAggregate.allowed_levels:
        pytest.xfail(
            f'The level "{exemplar_level_param["level"]}" is not supported as an aggregation unit for MeaningfulLocations.'
        )
    mfl_agg = MeaningfulLocationsAggregate(
        meaningful_locations=MeaningfulLocations(
            clusters=HartiganCluster(
                calldays=CallDays(subscriber_locations=subscriber_locations(
                    start="2016-01-01",
                    stop="2016-01-02",
                    level="versioned-site")),
                radius=1,
            ),
            scores=EventScore(start="2016-01-01",
                              stop="2016-01-02",
                              level="versioned-site"),
            labels=labels,
            label="evening",
        ),
        **exemplar_level_param,
    )

    assert get_column_names_from_run(mfl_agg) == mfl_agg.column_names
Ejemplo n.º 18
0
    def test_different_call_days_format(self):
        """
        Test whether we can pass different call days format such as table name, SQL query and CallDays class.
        """
        cd = CallDays("2016-01-01", "2016-01-04", level="versioned-site")
        har = HartiganCluster(cd, 50).get_dataframe()
        self.assertIsInstance(har, pd.DataFrame)

        cd.store().result()

        har = HartiganCluster(Table(cd.table_name), 50).get_dataframe()
        self.assertIsInstance(har, pd.DataFrame)

        cd_query = cd.get_query()
        har = HartiganCluster(CustomQuery(cd_query), 50).get_dataframe()
        self.assertIsInstance(har, pd.DataFrame)
def test_meaningful_locations_aggregation_results(exemplar_spatial_unit_param,
                                                  get_dataframe):
    """
    Test that aggregating MeaningfulLocations returns expected results and redacts values below 15.
    """
    if not exemplar_spatial_unit_param.is_polygon:
        pytest.xfail(
            f"The spatial unit {exemplar_spatial_unit_param} is not supported as an aggregation unit for MeaningfulLocations."
        )
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="evening",
    )
    mfl_agg = MeaningfulLocationsAggregate(
        meaningful_locations=mfl, spatial_unit=exemplar_spatial_unit_param)
    mfl_df = get_dataframe(mfl)
    mfl_agg_df = get_dataframe(mfl_agg)
    # Aggregate should not include any counts below 15
    assert all(mfl_agg_df.total > 15)
    # Sum of aggregate should be less than the number of unique subscribers
    assert mfl_agg_df.total.sum() < mfl_df.subscriber.nunique()
Ejemplo n.º 20
0
def test_meaningful_locations_aggregation_results(exemplar_spatial_unit_param,
                                                  get_dataframe,
                                                  meaningful_locations_labels):
    """
    Test that aggregating MeaningfulLocations returns expected results and redacts values below 15.
    """
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=meaningful_locations_labels,
        label="evening",
    )
    mfl_agg = RedactedMeaningfulLocationsAggregate(
        meaningful_locations_aggregate=MeaningfulLocationsAggregate(
            meaningful_locations=mfl,
            spatial_unit=make_spatial_unit("admin", level=3)))
    mfl_agg_df = get_dataframe(mfl_agg)
    # Aggregate should not include any counts below 15
    assert all(mfl_agg_df.value > 15)
Ejemplo n.º 21
0
def test_hartigan_column_names(get_column_names_from_run):
    """Test that Hartigan has correct column_names property."""
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))
    hartigan = HartiganCluster(calldays=cd, radius=50)
    assert get_column_names_from_run(hartigan) == hartigan.column_names
Ejemplo n.º 22
0
def test_bigger_radius_yields_fewer_clusters(get_dataframe):
    """
    Test whether bigger radius yields fewer clusters per subscriber
    """
    radius = [1, 2, 5, 10, 50]
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    h = get_dataframe(HartiganCluster(calldays=cd, radius=radius[0]))
    nclusters_small_radius = h.groupby("subscriber").size()

    for r in radius[1:]:
        h = get_dataframe(HartiganCluster(calldays=cd, radius=r))
        nclusters_big_radius = h.groupby("subscriber").size()
        assert all(nclusters_small_radius >= nclusters_big_radius)
        nclusters_small_radius = nclusters_big_radius
Ejemplo n.º 23
0
def test_hartigan_cluster_bad_calldays_column_names_raises_error():
    """
    Test that using calldays without 'site_id' and 'version' columns raises an error.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("lon-lat")))
    with pytest.raises(ValueError):
        HartiganCluster(calldays=cd, radius=50)
Ejemplo n.º 24
0
 def test_buffered_hartigan(self):
     """
     Test whether Hartigan produces buffered clusters when buffer is larger than 0.
     """
     har = HartiganCluster(self.cd, 50, buffer=2).to_geopandas()
     areas = har.geometry.area
     # since the mock data does not have geom_area in the site table we either
     # get the clusters with area equivalent to 2km2 (areas below are in degrees) or None.
     min_area = areas.min()
     max_area = areas.max()
     self.assertAlmostEquals(min_area, 0.001, 3)
     self.assertAlmostEquals(max_area, 0.001, 3)
Ejemplo n.º 25
0
def test_different_call_days_format(get_dataframe):
    """
    Test whether we can pass different call days format such as table name, SQL query and CallDays class.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))
    har = get_dataframe(HartiganCluster(calldays=cd, radius=50))
    assert isinstance(har, pd.DataFrame)

    cd.store().result()

    har = get_dataframe(
        HartiganCluster(calldays=Table(cd.fully_qualified_table_name),
                        radius=50))
    assert isinstance(har, pd.DataFrame)

    cd_query = cd.get_query()
    har = get_dataframe(
        HartiganCluster(calldays=CustomQuery(cd_query, cd.column_names),
                        radius=50))
    assert isinstance(har, pd.DataFrame)
Ejemplo n.º 26
0
def test_all_options_hartigan():
    """
    Test whether Hartigan works when changing all options.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    har = HartiganCluster(calldays=cd, radius=50, buffer=2,
                          call_threshold=2).to_geopandas()
    assert set(har.columns) == set([
        "subscriber", "geometry", "rank", "calldays", "site_id", "version",
        "centroid"
    ])
Ejemplo n.º 27
0
def test_first_call_day_in_first_cluster(get_dataframe):
    """
    Test that the first ranked call day of each subscriber is in the first cluster of each subscriber.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))
    cd_df = get_dataframe(cd)

    hartigan = HartiganCluster(calldays=cd, radius=50)
    har_df = hartigan.to_geopandas()
    cd_first = cd_df[["subscriber", "site_id",
                      "version"]].groupby("subscriber").first()
    har_first = (har_df[["subscriber", "site_id",
                         "version"]].groupby("subscriber").first())

    joined = cd_first.join(har_first, lsuffix="_cd", rsuffix="_har")
    s = joined.apply(
        lambda x:
        (x.site_id_cd in x.site_id_har) and (x.version_cd in x.version_har),
        axis=1,
    )
    assert all(s)
Ejemplo n.º 28
0
def _make_meaningful_locations_object(
    *,
    start_date,
    end_date,
    label,
    labels,
    event_types,
    subscriber_subset,
    tower_cluster_call_threshold,
    tower_cluster_radius,
    tower_day_of_week_scores,
    tower_hour_of_day_scores,
):
    q_subscriber_locations = SubscriberLocations(
        start=start_date,
        stop=end_date,
        spatial_unit=make_spatial_unit(
            "versioned-site"
        ),  # note this 'spatial_unit' is not the same as the exposed parameter 'aggregation_unit'
        table=event_types,
        subscriber_subset=subscriber_subset,
    )
    q_call_days = CallDays(subscriber_locations=q_subscriber_locations)
    q_hartigan_cluster = HartiganCluster(
        calldays=q_call_days,
        radius=tower_cluster_radius,
        call_threshold=tower_cluster_call_threshold,
        buffer=
        0,  # we're not exposing 'buffer', apparently, so we're hard-coding it
    )
    q_event_score = EventScore(
        start=start_date,
        stop=end_date,
        score_hour=tower_hour_of_day_scores,
        score_dow=tower_day_of_week_scores,
        spatial_unit=make_spatial_unit(
            "versioned-site"
        ),  # note this 'spatial_unit' is not the same as the exposed parameter 'aggregation_unit'
        table=event_types,
        subscriber_subset=subscriber_subset,
    )
    q_meaningful_locations = MeaningfulLocations(clusters=q_hartigan_cluster,
                                                 labels=labels,
                                                 scores=q_event_score,
                                                 label=label)
    return q_meaningful_locations
Ejemplo n.º 29
0
def test_column_names_meaningful_locations(get_column_names_from_run):
    """ Test that column_names property matches head(0) for meaningfullocations"""
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=subscriber_locations(
                start="2016-01-01", stop="2016-01-02",
                level="versioned-site")),
            radius=1,
        ),
        scores=EventScore(start="2016-01-01",
                          stop="2016-01-02",
                          level="versioned-site"),
        labels=labels,
        label="evening",
    )

    assert get_column_names_from_run(mfl) == mfl.column_names
Ejemplo n.º 30
0
def test_buffered_hartigan():
    """
    Test whether Hartigan produces buffered clusters when buffer is larger than 0.
    """
    cd = CallDays(
        SubscriberLocations("2016-01-01",
                            "2016-01-04",
                            spatial_unit=make_spatial_unit("versioned-site")))

    har = HartiganCluster(calldays=cd, radius=50, buffer=2).to_geopandas()
    areas = har.geometry.area
    # since the mock data does not have geom_area in the site table we either
    # get the clusters with area equivalent to 2km2 (areas below are in degrees) or None.
    min_area = areas.min()
    max_area = areas.max()
    assert min_area == pytest.approx(0.0011327683603873115)
    assert max_area == pytest.approx(0.001166624454009738)