예제 #1
0
def test_get_addons(spark, addon_whitelist, multi_clusters_df):
    multi_clusters_df.createOrReplaceTempView("longitudinal")

    samples_df = taar_similarity.get_samples(spark)
    addons_df = taar_similarity.get_addons_per_client(samples_df, addon_whitelist, 2)

    # We should have one row per client and that row should contain
    # addons as an array.
    assert samples_df.count() == addons_df.count()
    assert isinstance(addons_df.schema.fields[1].dataType, ArrayType)
예제 #2
0
def test_get_addons(spark, addon_whitelist, multi_clusters_df):

    samples_df = taar_similarity.get_samples(spark, date_from="20180101")

    # Force caching in the test case
    samples_df.cache()

    addons_df = taar_similarity.get_addons_per_client(samples_df, addon_whitelist, 2)

    # We should have one row per client and that row should contain
    # addons as an array.
    assert samples_df.count() == addons_df.count()
    assert isinstance(addons_df.schema.fields[1].dataType, ArrayType)