예제 #1
0
def test_transform_clients(generate_data):
    submission_dates = generate_dates(start_ds)
    snippets = [
        # not new, default
        {
            "client_id": "0",
            "profile_creation_date": 0,
        },
        # new, but duplicate
        {
            "client_id": "1"
        },
        {
            "client_id": "1",
            "timestamp": submission_dates["timestamp"] + 1,
        },
        # new, not default
        {
            "client_id": "2",
            "is_default_browser": False
        }
    ]

    df = generate_data(snippets)
    res = topline.transform(df, start_ds, "weekly")

    assert res.count() == 1
    row = res.first()

    assert row.actives == 3
    assert row.new_records == 2
    assert row.default == 2
예제 #2
0
def test_transform_hours(generate_data):
    snippets = [
        {
            "country": "US",
            "subsession_length": topline.seconds_per_hour
        },
        {
            "country": "CA",
            "subsession_length": topline.seconds_per_hour
        },
        {
            "subsession_length": 181 * topline.seconds_per_day
        },
        {
            "subsession_length": -1 * topline.seconds_per_day
        },
    ]

    df = generate_data(snippets)
    res = topline.transform(df, start_ds, "weekly")

    assert res.count() == 2
    assert res.groupBy().sum().first()["sum(hours)"] == 2.0
    assert (res.groupBy("geo").sum().where(
        F.col("geo") == "CA").first()["sum(hours)"]) == 1.0
예제 #3
0
def test_transform_searches_filters_incontent(generate_data):
    snippets = [
        {
            'search_counts': [search_row("google", source="in-content")]
        },  # no
        {
            'search_counts': [search_row("google", source="contextmenu")]
        },  # yes
        {
            'search_counts': [search_row("google", source="abouthome")]
        },  # yes
    ]

    df = generate_data(snippets)
    res = topline.transform(df, start_ds, "weekly")

    assert res.groupBy().sum().first()["sum(google)"] == 2
예제 #4
0
def test_transform_searches(generate_data):
    snippets = [
        {
            "search_counts": None
        },
        {
            "search_counts": [search_row("google")]
        },
        {
            "country": "CA",
            "search_counts": [search_row("hooli"),
                              search_row("google")]
        },
    ]

    df = generate_data(snippets)
    res = topline.transform(df, start_ds, "weekly")

    assert res.count() == 2
    assert res.groupBy().sum().first()["sum(google)"] == 2
    assert (res.groupBy("geo").sum().where(
        F.col("geo") == "CA").first()["sum(other)"]) == 1