def test_transform_clients(generate_data): submission_dates = generate_dates(start_ds) snippets = [ # not new, default { "client_id": "0", "profile_creation_date": 0, }, # new, but duplicate { "client_id": "1" }, { "client_id": "1", "timestamp": submission_dates["timestamp"] + 1, }, # new, not default { "client_id": "2", "is_default_browser": False } ] df = generate_data(snippets) res = topline.transform(df, start_ds, "weekly") assert res.count() == 1 row = res.first() assert row.actives == 3 assert row.new_records == 2 assert row.default == 2
def test_transform_hours(generate_data): snippets = [ { "country": "US", "subsession_length": topline.seconds_per_hour }, { "country": "CA", "subsession_length": topline.seconds_per_hour }, { "subsession_length": 181 * topline.seconds_per_day }, { "subsession_length": -1 * topline.seconds_per_day }, ] df = generate_data(snippets) res = topline.transform(df, start_ds, "weekly") assert res.count() == 2 assert res.groupBy().sum().first()["sum(hours)"] == 2.0 assert (res.groupBy("geo").sum().where( F.col("geo") == "CA").first()["sum(hours)"]) == 1.0
def test_transform_searches_filters_incontent(generate_data): snippets = [ { 'search_counts': [search_row("google", source="in-content")] }, # no { 'search_counts': [search_row("google", source="contextmenu")] }, # yes { 'search_counts': [search_row("google", source="abouthome")] }, # yes ] df = generate_data(snippets) res = topline.transform(df, start_ds, "weekly") assert res.groupBy().sum().first()["sum(google)"] == 2
def test_transform_searches(generate_data): snippets = [ { "search_counts": None }, { "search_counts": [search_row("google")] }, { "country": "CA", "search_counts": [search_row("hooli"), search_row("google")] }, ] df = generate_data(snippets) res = topline.transform(df, start_ds, "weekly") assert res.count() == 2 assert res.groupBy().sum().first()["sum(google)"] == 2 assert (res.groupBy("geo").sum().where( F.col("geo") == "CA").first()["sum(other)"]) == 1