Ejemplo n.º 1
0
def test_events_table_subset_column_names(columns):
    """Test that EventTableSubset column_names property is accurate."""
    etu = EventTableSubset(start="2016-01-01",
                           stop="2016-01-02",
                           columns=columns,
                           table="events.calls")
    assert etu.head(0).columns.tolist() == etu.column_names
Ejemplo n.º 2
0
def test_turn_on_caching():
    """
    *.get_dataframe() dataframe is retained when we turning on caching.
    """
    sd = EventTableSubset(start="2016-01-01", stop="2016-01-02")
    sd.get_dataframe()
    sd.turn_off_caching()
    sd.turn_on_caching()
    sd.get_dataframe()
    assert isinstance(sd._df, pd.DataFrame)
Ejemplo n.º 3
0
def test_events_table_subscriber_ident_substitutions(ident):
    """Test that EventTableSubset replaces the subscriber ident column name with subscriber."""
    etu = EventTableSubset(
        start="2016-01-01",
        stop="2016-01-02",
        columns=[ident],
        table="events.calls",
        subscriber_identifier=ident,
    )
    assert "subscriber" == etu.head(0).columns[0]
    assert ["subscriber"] == etu.column_names
Ejemplo n.º 4
0
def test_graph():
    """Test that dependency graph util runs and has some correct entries."""
    g = daily_location("2016-01-01").dependency_graph()
    sd = EventTableSubset("2016-01-01",
                          "2016-01-02",
                          columns=["msisdn", "datetime", "location_id"])
    assert "x{}".format(sd.md5) in g.nodes()
Ejemplo n.º 5
0
def test_cache_is_returned():
    """
    Cache property is returned when called.
    """
    sd = EventTableSubset(start="2016-01-01", stop="2016-01-02")
    sd.get_dataframe()
    sd.turn_on_caching()
    assert sd.cache

    sd.turn_off_caching()
    assert not sd.cache
Ejemplo n.º 6
0
def test_subset_correct(subscriber_list, get_dataframe):
    """Test that pushed in subsetting matches .subset result"""
    su = EventTableSubset(start="2016-01-01",
                          stop="2016-01-03",
                          subscriber_subset=subscriber_list)
    subsu = EventTableSubset(start="2016-01-01",
                             stop="2016-01-03").subset("subscriber",
                                                       subscriber_list)
    assert all(get_dataframe(su) == get_dataframe(subsu))
    su = ModalLocation(*[
        daily_location(d, subscriber_subset=subscriber_list)
        for d in list_of_dates("2016-01-01", "2016-01-07")
    ])
    subsu = ModalLocation(
        *
        [daily_location(d)
         for d in list_of_dates("2016-01-01", "2016-01-03")]).subset(
             "subscriber", subscriber_list)
    assert all(get_dataframe(su) == get_dataframe(subsu))
Ejemplo n.º 7
0
def test_get_df_without_caching():
    """
    *.get_dataframe() can still retrieve the dataframe without caching.
    """
    sd = EventTableSubset(start="2016-01-01", stop="2016-01-02")
    sd.get_dataframe()
    sd.turn_off_caching()
    assert isinstance(sd.get_dataframe(), pd.DataFrame)
    assert isinstance(sd.get_dataframe(), pd.DataFrame)
Ejemplo n.º 8
0
def test_omitted_subscriber_column(get_dataframe, subscriber_list):
    """Test that a result is returned and warning is raised when omitting a subscriber column."""
    with pytest.warns(UserWarning):
        su_omit_col = get_dataframe(
            EventTableSubset(
                start="2016-01-01",
                stop="2016-01-03",
                subscriber_subset=subscriber_list,
                columns=["duration"],
            ))
    su_all_cols = get_dataframe(
        EventTableSubset(
            start="2016-01-01",
            stop="2016-01-03",
            subscriber_subset=subscriber_list,
            columns=["msisdn", "duration"],
        ))
    assert su_omit_col.duration.values.tolist(
    ) == su_all_cols.duration.values.tolist()
    assert su_omit_col.columns.tolist() == ["duration"]
Ejemplo n.º 9
0
def test_calculate_dependency_graph():
    """
    Test that calculate_dependency_graph() runs and the returned graph has some correct entries.
    """
    query = daily_location("2016-01-01")
    G = calculate_dependency_graph(query, analyse=True)
    sd = EventTableSubset(
        start="2016-01-01",
        stop="2016-01-02",
        columns=["msisdn", "datetime", "location_id"],
    )
    assert f"x{sd.query_id}" in G.nodes()
    assert G.nodes[f"x{sd.query_id}"]["query_object"].query_id == sd.query_id
Ejemplo n.º 10
0
def test_can_subset_by_sampler(get_dataframe):
    """Test that we can use the output of another query to subset by."""
    unique_subs_sample = UniqueSubscribers(
        "2016-01-01", "2016-01-07").random_sample(size=10,
                                                  sampling_method="system",
                                                  seed=0.1)
    su = EventTableSubset(start="2016-01-01",
                          stop="2016-01-03",
                          subscriber_subset=unique_subs_sample)
    su_set = set(get_dataframe(su).subscriber)
    uu_set = set(get_dataframe(unique_subs_sample).subscriber)
    assert su_set == uu_set
    assert len(su_set) == 10
Ejemplo n.º 11
0
def test_turn_off_caching():
    """
    *.turn_off_caching() 'forgets' generated dataframe.
    """
    sd = EventTableSubset(start="2016-01-01", stop="2016-01-02")
    sd.get_dataframe()
    sd.turn_off_caching()
    with pytest.raises(AttributeError):
        sd._df
Ejemplo n.º 12
0
def test_cdrs_can_be_subset_by_list(get_dataframe, subscriber_list):
    """
    We can subset CDRs with a list.
    """

    su = EventTableSubset(start="2016-01-01",
                          stop="2016-01-03",
                          subscriber_subset=subscriber_list)
    df = get_dataframe(su)

    # Get the set of subscribers present in the dataframe, we need to handle the logic
    # of msisdn_from/msisdn_to
    calculated_subscriber_set = set(df.subscriber)

    assert calculated_subscriber_set == set(subscriber_list)
Ejemplo n.º 13
0
def test_turn_off_caching_handles_error():
    """
    *.turn_off_caching() works even if ._df attribute is not present.
    """
    sd = EventTableSubset(start="2016-01-01", stop="2016-01-02")
    sd.get_dataframe()
    sd.turn_off_caching()
    sd.turn_on_caching()
    sd.get_dataframe()

    del sd._df
    sd.turn_off_caching()