def test_meaningful_locations_od_results(get_dataframe):
    """
    Test that OD on MeaningfulLocations returns expected results and counts clusters per subscriber correctly.
    """
    # FIXME: Because of the nature of the test data, we can't actually test much for most admin levels because
    # the counts will always be below 15, and hence get redacted!
    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="unknown",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-02",
                stop="2016-01-03",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-02",
            stop="2016-01-03",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="unknown",
    )
    mfl_od = MeaningfulLocationsOD(
        meaningful_locations_a=mfl_a,
        meaningful_locations_b=mfl_b,
        spatial_unit=make_spatial_unit("admin", level=1),
    )
    mfl_od_df = get_dataframe(mfl_od)
    # Aggregate should not include any counts below 15
    assert all(mfl_od_df.total > 15)
    # Smoke test one admin1 region gets the expected result
    assert mfl_od_df[(mfl_od_df.pcod_from == "524 1") & (
        mfl_od_df.pcod_to == "524 4")].total[0] == pytest.approx(16.490_807)
    assert mfl_od_df.total.sum() == pytest.approx(350.806_012)
def test_column_names_meaningful_locations_od(exemplar_spatial_unit_param,
                                              get_column_names_from_run):
    """ Test that column_names property matches head(0) for an od matrix between meaningful locations"""
    if not exemplar_spatial_unit_param.is_polygon:
        pytest.xfail(
            f"The spatial unit {exemplar_spatial_unit_param} is not supported as an aggregation unit for ODs between MeaningfulLocations."
        )
    mfl_a = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="evening",
    )

    mfl_b = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="unknown",
    )
    mfl_od = MeaningfulLocationsOD(
        meaningful_locations_a=mfl_a,
        meaningful_locations_b=mfl_b,
        spatial_unit=exemplar_spatial_unit_param,
    )

    assert get_column_names_from_run(mfl_od) == mfl_od.column_names
def test_meaningful_locations_od_raises_for_bad_spatial_unit(
        exemplar_spatial_unit_param, get_dataframe):
    """
    Test that od on meaningful locations raises an InvalidSpatialUnitError for a bad spatial unit.
    """
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="evening",
    )

    with pytest.raises(InvalidSpatialUnitError):
        mfl_od = MeaningfulLocationsOD(
            meaningful_locations_a=mfl,
            meaningful_locations_b=mfl,
            spatial_unit=make_spatial_unit("lon-lat"),
        )
def test_meaningful_locations_aggregation_results(exemplar_spatial_unit_param,
                                                  get_dataframe):
    """
    Test that aggregating MeaningfulLocations returns expected results and redacts values below 15.
    """
    if not exemplar_spatial_unit_param.is_polygon:
        pytest.xfail(
            f"The spatial unit {exemplar_spatial_unit_param} is not supported as an aggregation unit for MeaningfulLocations."
        )
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label="evening",
    )
    mfl_agg = MeaningfulLocationsAggregate(
        meaningful_locations=mfl, spatial_unit=exemplar_spatial_unit_param)
    mfl_df = get_dataframe(mfl)
    mfl_agg_df = get_dataframe(mfl_agg)
    # Aggregate should not include any counts below 15
    assert all(mfl_agg_df.total > 15)
    # Sum of aggregate should be less than the number of unique subscribers
    assert mfl_agg_df.total.sum() < mfl_df.subscriber.nunique()
def test_meaningful_locations_results(label, expected_number_of_clusters,
                                      get_dataframe):
    """
    Test that MeaningfulLocations returns expected results and counts clusters per subscriber correctly.
    """
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=SubscriberLocations(
                start="2016-01-01",
                stop="2016-01-02",
                spatial_unit=make_spatial_unit("versioned-site"),
            )),
            radius=1,
        ),
        scores=EventScore(
            start="2016-01-01",
            stop="2016-01-02",
            spatial_unit=make_spatial_unit("versioned-site"),
        ),
        labels=labels,
        label=label,
    )
    mfl_df = get_dataframe(mfl)
    assert len(mfl_df) == expected_number_of_clusters
    count_clusters = mfl_df.groupby(["subscriber", "label", "n_clusters"],
                                    as_index=False).count()
    # Check that query has correctly counted the number of clusters per subscriber
    assert all(count_clusters.n_clusters == count_clusters.cluster)
def test_column_names_meaningful_locations_aggregate(
        exemplar_level_param, get_column_names_from_run):
    """ Test that column_names property matches head(0) for aggregated meaningful locations"""
    if exemplar_level_param[
            "level"] not in MeaningfulLocationsAggregate.allowed_levels:
        pytest.xfail(
            f'The level "{exemplar_level_param["level"]}" is not supported as an aggregation unit for MeaningfulLocations.'
        )
    mfl_agg = MeaningfulLocationsAggregate(
        meaningful_locations=MeaningfulLocations(
            clusters=HartiganCluster(
                calldays=CallDays(subscriber_locations=subscriber_locations(
                    start="2016-01-01",
                    stop="2016-01-02",
                    level="versioned-site")),
                radius=1,
            ),
            scores=EventScore(start="2016-01-01",
                              stop="2016-01-02",
                              level="versioned-site"),
            labels=labels,
            label="evening",
        ),
        **exemplar_level_param,
    )

    assert get_column_names_from_run(mfl_agg) == mfl_agg.column_names
def test_meaningful_locations_aggregation_results(exemplar_level_param,
                                                  get_dataframe):
    """
    Test that aggregating MeaningfulLocations returns expected results and redacts values below 15.
    """
    if exemplar_level_param[
            "level"] not in MeaningfulLocationsAggregate.allowed_levels:
        pytest.xfail(
            f'The level "{exemplar_level_param["level"]}" is not supported as an aggregation unit for MeaningfulLocations.'
        )
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=subscriber_locations(
                start="2016-01-01", stop="2016-01-02",
                level="versioned-site")),
            radius=1,
        ),
        scores=EventScore(start="2016-01-01",
                          stop="2016-01-02",
                          level="versioned-site"),
        labels=labels,
        label="evening",
    )
    mfl_agg = MeaningfulLocationsAggregate(meaningful_locations=mfl,
                                           **exemplar_level_param)
    mfl_df = get_dataframe(mfl)
    mfl_agg_df = get_dataframe(mfl_agg)
    # Aggregate should not include any counts below 15
    assert all(mfl_agg_df.total > 15)
    # Sum of aggregate should be less than the number of unique subscribers
    assert mfl_agg_df.total.sum() < mfl_df.subscriber.nunique()
def test_column_names_meaningful_locations(get_column_names_from_run):
    """ Test that column_names property matches head(0) for meaningfullocations"""
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=subscriber_locations(
                start="2016-01-01", stop="2016-01-02",
                level="versioned-site")),
            radius=1,
        ),
        scores=EventScore(start="2016-01-01",
                          stop="2016-01-02",
                          level="versioned-site"),
        labels=labels,
        label="evening",
    )

    assert get_column_names_from_run(mfl) == mfl.column_names
def test_meaningful_locations_aggregate_disallowed_level_raises():
    """ Test that a bad level raises a BadLevelError"""

    with pytest.raises(BadLevelError):
        mfl_agg = MeaningfulLocationsAggregate(
            meaningful_locations=MeaningfulLocations(
                clusters=HartiganCluster(
                    calldays=CallDays(
                        subscriber_locations=subscriber_locations(
                            start="2016-01-01",
                            stop="2016-01-02",
                            level="versioned-site",
                        )),
                    radius=1,
                ),
                scores=EventScore(start="2016-01-01",
                                  stop="2016-01-02",
                                  level="versioned-site"),
                labels=labels,
                label="evening",
            ),
            level="NOT_A_LEVEL",
        )
def test_meaningful_locations_od_raises_for_bad_level(exemplar_level_param,
                                                      get_dataframe):
    """
    Test that od on meaningful locations raises a BadLevelError for a bad level.
    """
    mfl = MeaningfulLocations(
        clusters=HartiganCluster(
            calldays=CallDays(subscriber_locations=subscriber_locations(
                start="2016-01-01", stop="2016-01-02",
                level="versioned-site")),
            radius=1,
        ),
        scores=EventScore(start="2016-01-01",
                          stop="2016-01-02",
                          level="versioned-site"),
        labels=labels,
        label="evening",
    )

    with pytest.raises(BadLevelError):
        mfl_od = MeaningfulLocationsOD(meaningful_locations_a=mfl,
                                       meaningful_locations_b=mfl,
                                       level="NOT_A_LEVEL")
def test_meaningful_locations_aggregate_disallowed_spatial_unit_raises():
    """ Test that a bad spatial unit raises an InvalidSpatialUnitError"""

    with pytest.raises(InvalidSpatialUnitError):
        mfl_agg = MeaningfulLocationsAggregate(
            meaningful_locations=MeaningfulLocations(
                clusters=HartiganCluster(
                    calldays=CallDays(subscriber_locations=SubscriberLocations(
                        start="2016-01-01",
                        stop="2016-01-02",
                        spatial_unit=make_spatial_unit("versioned-site"),
                    )),
                    radius=1,
                ),
                scores=EventScore(
                    start="2016-01-01",
                    stop="2016-01-02",
                    spatial_unit=make_spatial_unit("versioned-site"),
                ),
                labels=labels,
                label="evening",
            ),
            spatial_unit=make_spatial_unit("lon-lat"),
        )