def test_get_addons(spark, addon_whitelist, multi_clusters_df): multi_clusters_df.createOrReplaceTempView("longitudinal") samples_df = taar_similarity.get_samples(spark) addons_df = taar_similarity.get_addons_per_client(samples_df, addon_whitelist, 2) # We should have one row per client and that row should contain # addons as an array. assert samples_df.count() == addons_df.count() assert isinstance(addons_df.schema.fields[1].dataType, ArrayType)
def test_get_addons(spark, addon_whitelist, multi_clusters_df): samples_df = taar_similarity.get_samples(spark, date_from="20180101") # Force caching in the test case samples_df.cache() addons_df = taar_similarity.get_addons_per_client(samples_df, addon_whitelist, 2) # We should have one row per client and that row should contain # addons as an array. assert samples_df.count() == addons_df.count() assert isinstance(addons_df.schema.fields[1].dataType, ArrayType)