Ejemplo n.º 1
0
def test_average_unique_domains_count(spark):
    snippets = [
        # averages to 4
        {
            'client_id': '1',
            'unique_domains_count': 6
        },
        {
            'client_id': '1',
            'unique_domains_count': 2
        },
        # averages to 8
        {
            'client_id': '2',
            'unique_domains_count': 12
        },
        {
            'client_id': '2',
            'unique_domains_count': 4
        }
    ]
    input_df = snippets_to_df(spark, snippets)
    df = churn.compute_churn_week(input_df, week_start_ds)
    rows = df.collect()

    # (4 + 8) / 2 == 6
    assert rows[0].unique_domains_count_per_profile == 6
Ejemplo n.º 2
0
def test_nulled_stub_attribution_medium(nulled_attribution_df):
    df = churn.compute_churn_week(nulled_attribution_df, week_start_ds)
    rows = (df.select('medium').distinct().collect())
    actual = set([r.medium for r in rows])
    expect = set(['unknown'])

    assert actual == expect
Ejemplo n.º 3
0
def test_total_uri_count_per_client(spark):
    snippets = [{'total_uri_count': 1}, {'total_uri_count': 2}]
    input_df = snippets_to_df(spark, snippets)
    df = churn.compute_churn_week(input_df, week_start_ds)
    rows = df.collect()

    assert rows[0].total_uri_count == 3
Ejemplo n.º 4
0
def test_total_uri_count_per_client(generate_data):
    snippets = [{SPBE + 'total_uri_count': 1}, {SPBE + 'total_uri_count': 2}]
    input_df = generate_data(snippets)
    df = churn.compute_churn_week(input_df, week_start_ds)
    rows = df.collect()

    assert rows[0].total_uri_count == 3
Ejemplo n.º 5
0
def test_cohort_by_channel_aggregates(multi_profile_df):
    df = churn.compute_churn_week(multi_profile_df, week_start_ds)
    rows = (df.groupBy(df.channel).agg(
        F.sum('n_profiles').alias('n_profiles'),
        F.sum('usage_hours').alias('usage_hours')).where(
            df.channel == 'release-cck-mozilla42').collect())
    assert rows[0].n_profiles == 2
    assert rows[0].usage_hours == 4
Ejemplo n.º 6
0
def test_multiple_cohort_weeks_exist(multi_profile_df):
    df = churn.compute_churn_week(multi_profile_df, week_start_ds)
    rows = df.select('current_week').collect()

    actual = set([row.current_week for row in rows])
    expect = set([0, 1, 2])

    assert actual == expect
Ejemplo n.º 7
0
def test_current_cohort_week_is_zero(single_profile_df):
    df = churn.compute_churn_week(single_profile_df, week_start_ds)
    rows = df.collect()

    actual = rows[0].current_week
    expect = 0

    assert actual == expect
Ejemplo n.º 8
0
def test_simple_string_dimensions(spark):
    input_df = snippets_to_df(spark, [{
        'distribution_id': None,
        'default_search_engine': None,
        'locale': None
    }])
    df = churn.compute_churn_week(input_df, week_start_ds)
    rows = df.collect()

    assert rows[0].distribution_id == 'unknown'
    assert rows[0].default_search_engine == 'unknown'
    assert rows[0].locale == 'unknown'
Ejemplo n.º 9
0
def test_simple_string_dimensions(generate_data):
    snippets = [{
        'distribution_id': None,
        'default_search_engine': None,
        'locale': None
    }]
    input_df = generate_data(snippets)
    df = churn.compute_churn_week(input_df, week_start_ds)
    rows = df.collect()

    assert rows[0].distribution_id == 'unknown'
    assert rows[0].default_search_engine == 'unknown'
    assert rows[0].locale == 'unknown'
Ejemplo n.º 10
0
def test_empty_total_uri_count(spark):
    input_df = snippets_to_df(spark, [{'total_uri_count': None}])
    df = churn.compute_churn_week(input_df, week_start_ds)
    rows = df.collect()

    assert rows[0].total_uri_count == 0
Ejemplo n.º 11
0
def test_cohort_by_channel_count(multi_profile_df):
    df = churn.compute_churn_week(multi_profile_df, week_start_ds)
    rows = df.where(df.channel == 'release-cck-mozilla42').collect()

    assert len(rows) == 2
Ejemplo n.º 12
0
def test_profile_usage_length(single_profile_df):
    # there are two pings each with 1 hour of usage
    df = churn.compute_churn_week(single_profile_df, week_start_ds)
    rows = df.collect()

    assert rows[0].usage_hours == 2
Ejemplo n.º 13
0
def test_latest_submission_from_client_exists(single_profile_df):
    df = churn.compute_churn_week(single_profile_df, week_start_ds)
    assert df.count() == 1
Ejemplo n.º 14
0
def test_ignored_submissions_outside_of_period(late_submissions_df):
    df = churn.compute_churn_week(late_submissions_df, week_start_ds)
    assert df.count() == 0