コード例 #1
0
ファイル: test_random.py プロジェクト: synapticielRD/FlowKit
def test_system(get_dataframe):
    """
    Test whether the system method runs without failing.
    """
    # it is necessary to run a while loop since sometimes the system method
    # does not return any rows.
    df = []
    while len(df) == 0:
        df = get_dataframe(
            UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
                size=20, sampling_method="system"
            )
        )
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 20

    # it is necessary to run a while loop since sometimes the system method
    # does not return any rows.
    df = []
    while len(df) == 0:
        df = get_dataframe(
            UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
                fraction=0.25, sampling_method="system"
            )
        )
    assert list(df.columns) == ["subscriber"]
コード例 #2
0
ファイル: test_random.py プロジェクト: synapticielRD/FlowKit
def test_gets_parent_attributes():
    """
    Test that a random sample is an instance of the sampled thing.
    """
    qur = UniqueSubscribers(start="2016-01-01", stop="2016-01-04", hours=(4, 17))
    sample = qur.random_sample(
        size=10, sampling_method="bernoulli", estimate_count=False
    )
    assert sample.hours == (4, 17)
コード例 #3
0
def test_is_subclass():
    """
    Test that a random sample is an instance of the sampled thing. 
    """
    qur = UniqueSubscribers(start="2016-01-01", stop="2016-01-04")
    sample = qur.random_sample(size=10,
                               sampling_method="bernoulli",
                               estimate_count=False)
    assert isinstance(sample, UniqueSubscribers)
コード例 #4
0
ファイル: test_random.py プロジェクト: synapticielRD/FlowKit
def test_seeded_random_zero(sample_method):
    """
    Test that using 0 as seed results in reproducible outcomes
    """

    sample = UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
        size=10, sampling_method=sample_method, seed=0
    )
    assert sample.get_query() == sample.get_query()
コード例 #5
0
ファイル: test_random.py プロジェクト: greenape/FlowKit
def gets_parent_attributes():
    """
    Test that a random sample is an instance of the sampled thing.
    """
    qur = UniqueSubscribers(start="2016-01-01",
                            stop="2016-01-04",
                            level="admin3")
    sample = qur.random_sample(size=10,
                               method="bernoulli",
                               estimate_count=False)
    assert sample.level == "admin3"
コード例 #6
0
def test_seeded_random(sample_method, get_dataframe):
    """
    Tests whether class selects a repeatable random sample.
    """

    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method=sample_method, seed=0.1))
    df2 = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method=sample_method, seed=0.1))
    assert df.values.tolist() == df2.values.tolist()
コード例 #7
0
def test_system_rows(get_dataframe):
    """
    Test whether the system_rows method runs without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="system_rows"))
    assert len(df) == 10
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            fraction=0.1, sampling_method="system_rows"))
    assert len(df) == 50
コード例 #8
0
def test_bernoulli(get_dataframe):
    """
    Test whether the bernoulli method runs without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="bernoulli"))
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 10

    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            fraction=0.1, sampling_method="bernoulli"))
    assert list(df.columns) == ["subscriber"]
コード例 #9
0
class test_unique_subscribers(TestCase):
    def setUp(self):

        self.UU = UniqueSubscribers("2016-01-01", "2016-01-02")

    def test_returns_set(self):
        """
        UniqueSubscribers() returns set.
        """
        self.assertIs(type(self.UU.as_set()), set)

    def test_subscribers_unique(self):
        """
        Returned dataframe has unique subscribers.
        """

        self.assertTrue(self.UU.get_dataframe()["subscriber"].is_unique)
コード例 #10
0
ファイル: test_random.py プロジェクト: greenape/FlowKit
def test_seeded_random_badmethod():
    """
    Tests whether seeds don't work with system_rows.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            10, method="system_rows", seed=-0.5)
コード例 #11
0
ファイル: test_random.py プロジェクト: greenape/FlowKit
def test_bad_must_provide_either_sample_size_or_fraction():
    """
    Should raise an error if both sample size and fraction are passed.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01",
                          stop="2016-01-04").random_sample(10, fraction=0.5)
コード例 #12
0
ファイル: test_random.py プロジェクト: greenape/FlowKit
def test_bad_must_provide_sample_size_or_fraction():
    """
    Should raise an error if neither sample size nor fraction is passed.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01",
                          stop="2016-01-04").random_sample(None, fraction=None)
コード例 #13
0
def test_seeded_random_oob():
    """
    Tests whether seeds are restricted to within +/-1.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="random_ids", seed=-50)
コード例 #14
0
def test_bad_method_errors():
    """
    Bad sampling methods should raise an error.
    """

    with pytest.raises(ValueError):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="BAD_METHOD_TYPE", seed=-50)
コード例 #15
0
def test_not_estimate_count(get_dataframe):
    """
    Test whether not estimating counts runs without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="bernoulli", estimate_count=False))
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 10
コード例 #16
0
def test_seeded_random_badmethod():
    """
    Tests whether seeds don't work with system_rows.
    """

    with pytest.raises(TypeError,
                       match="got an unexpected keyword argument 'seed'"):
        UniqueSubscribers(start="2016-01-01", stop="2016-01-04").random_sample(
            size=10, sampling_method="system_rows", seed=-0.5)
コード例 #17
0
def test_random_msisdn(get_dataframe):
    """
    Tests whether class selects a random sample of msisdn without failing.
    """
    df = get_dataframe(
        UniqueSubscribers(start="2016-01-01",
                          stop="2016-01-04").random_sample(size=10))
    assert list(df.columns) == ["subscriber"]
    assert len(df) == 10
コード例 #18
0
    def test_subscribers_make_atleast_one_call_in_admin0(self):
        """
        The set of subscribers who make at least one call within admin0 over
        whole test time period should be equal to set of unique subscribers
        in test calls table.
        """

        start, stop = "2016-01-01", "2016-01-07"

        sls = SubscriberLocationSubset(start,
                                       stop,
                                       min_calls=1,
                                       level="admin0")
        us = UniqueSubscribers(start, stop, table="events.calls")

        sls_subs = set(sls.get_dataframe()["subscriber"])
        us_subs = set(us.get_dataframe()["subscriber"])

        self.assertEquals(sls_subs, us_subs)
コード例 #19
0
def test_pickling():
    """
    Test that we can pickle and unpickle random classes.
    """
    ss1 = UniqueSubscribers(start="2016-01-01",
                            stop="2016-01-04").random_sample(
                                size=10, sampling_method="system_rows")
    ss2 = Table("events.calls").random_sample(size=10,
                                              sampling_method="bernoulli",
                                              seed=0.73)
    for ss in [ss1, ss2]:
        assert ss.get_query() == pickle.loads(pickle.dumps(ss)).get_query()
        assert ss.query_id == pickle.loads(pickle.dumps(ss)).query_id
コード例 #20
0
def test_unique_subscriber_column_names():
    """Test that column_names property of UniqueSubscribers is accurate"""
    us = UniqueSubscribers("2016-01-01", "2016-01-02")
    assert us.head(0).columns.tolist() == us.column_names
コード例 #21
0
    def setUp(self):

        self.UU = UniqueSubscribers("2016-01-01", "2016-01-02")