Exemple #1
0
def test_all_(spark):
    df = spark.createDataFrame(
        data=[
            [True, False, False],
            [True, False, True],
            [True, False, False],
            [True, False, True],
        ],
        schema=["all_true", "all_false", "mixed"],
    )
    pres = df.select(
        all_([df.all_true, df.all_true, df.all_true]).alias("true_1"),
        all_([df.all_true, df.all_false]).alias("false_2"),
        all_([df.all_false, df.all_false]).alias("false_3"),
        all_([df.mixed, df.all_false]).alias("false_4"),
        all_([df.mixed, df.all_true]).alias("mixed_5"),
    ).toPandas()
    assert pres.shape == (4, 5)
    assert not pres.isnull().any().any()
    assert pres["true_1"].all()
    assert not pres["false_2"].any()
    assert not pres["false_3"].any()
    assert not pres["false_4"].any()
    assert not pres["mixed_5"][::2].any()
    assert pres["mixed_5"][1::2].all()

    # Check this workaround is still necessary:
    with pytest.raises(ValueError):
        all([df.all_true, df.all_true])
Exemple #2
0
def make_select_col(event, metric_key):
    """Return an int Column named `metric_key`, given an event Column.

    In the returned Column, a row is 1 if the event is `metric_key`,
    otherwise it is 0.

    Useful when trying to count the number of occurrences of an event -
    just sum this column.

    Example usage:

        from pyspark.sql import functions as F

        t = spark.table('telemetry_mobile_event_parquet')
        t2 = t.filter(
            t.submission_date_s3 == '20190101'
        ).select(
            F.explode(t.events).alias('event')
        )
        t3 = t2.select(
            make_select_col(t2.event, 'session_start'),
            make_select_col(t2.event, 'session_end')
        )
        t3.agg(F.sum(t3.session_start), F.sum(t3.session_end)).collect()
    """
    return all_(
        event[k] == v for (k, v) in metric_library[metric_key].items()
    ).astype('int').alias(metric_key)
Exemple #3
0
def pocket_video_clicks(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'click',
        fe.event.object == 'menu',
        fe.event.value == 'pocket_video_tile'
    ]).astype('int')),
Exemple #4
0
def unenroll(events, experiment):
    return agg_any(
        all_([
            events.event_category == 'normandy',
            events.event_method == 'unenroll',
            events.event_string_value == experiment.experiment_slug,
        ]))
Exemple #5
0
def tracking_protection_toggle_off(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'change',
        fe.event.object == 'turbo_mode',
        fe.event.value == 'off'
    ]).astype('int')),
Exemple #6
0
def remote_backs(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'page',
        fe.event.object == 'browser',
        fe.event.value == 'back'
    ]).astype('int')),
Exemple #7
0
def browser_backs(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'click',
        fe.event.object == 'menu',
        fe.event.value == 'back'
    ]).astype('int')),
Exemple #8
0
def bundled_non_youtube_tile_clicks(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'click',
        fe.event.object == 'home_tile',
        fe.event.value == 'bundled',
        fe.event.extra['tile_id'] != 'youtube',
    ]).astype('int')),
Exemple #9
0
def home_tile_clicks(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'click',
        fe.event.object == 'home_tile',

        # Otherwise youtube is double counted (per liuche 2019/06/07) :'(
        fe.event.value != 'youtube_tile'
    ]).astype('int')),
Exemple #10
0
def anything_but_youtube_tile_clicks(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'click',
        fe.event.object == 'home_tile',
        fe.event.value != 'youtube_tile',
        (
            F.isnull(fe.event.extra['tile_id']) |
            (fe.event.extra['tile_id'] != 'youtube')
        )
    ]).astype('int')),
Exemple #11
0
def navigates_or_clicks_not_youtube(fe):
    return F.sum(any_([
        all_([
            fe.event.category == 'action',
            fe.event.method == 'click',
            fe.event.object == 'home_tile',
            fe.event.value != 'youtube_tile',
            (
                F.isnull(fe.event.extra['tile_id']) |
                (fe.event.extra['tile_id'] != 'youtube')
            )
        ]),
        all_([
            fe.event.category == 'action',
            fe.event.method == 'type_url',
            fe.event.object == 'search_bar',
        ]),
        all_([
            fe.event.category == 'action',
            fe.event.method == 'type_query',
            fe.event.object == 'search_bar',
        ])
    ]).astype('int')),
Exemple #12
0
def make_where(event, metric_key):
    """Return a bool Column named `metric_key`, given an event Column.

    In the returned Column, a row is True iff the event is `metric_key`.

    Useful when filtering for an event.

    Example usage:

        t = spark.table('telemetry_mobile_event_parquet')
        t2 = t.filter(
            t.submission_date_s3 == '20190101'
        ).select(
            F.explode(t.events).alias('event')
        )
        t3 = t2.filter(make_where(t2.event, 'session_start'))
    """
    return all_(
        event[k] == v for (k, v) in metric_library[metric_key].items()
    ).alias(metric_key)
Exemple #13
0
def view_about_protections(events):
    return agg_any(
        all_([
            events.event_object == 'protection_report',
            events.event_method == 'show',
        ]))
Exemple #14
0
def view_about_logins(events):
    return agg_any(
        all_([
            events.event_method == 'open_management',
            events.event_category == 'pwmgr',
        ]))
Exemple #15
0
def type_queries(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'type_query',
        fe.event.object == 'search_bar',
    ]).astype('int')),
Exemple #16
0
def user_show_menus(fe):
    return F.sum(all_([
        fe.event.category == 'action',
        fe.event.method == 'user_show',
        fe.event.object == 'menu',
    ]).astype('int')),