def test_weights_categorical(): ''' This should get : ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"], ["{test-guid-9}", "{test-guid-10}", "{test-guid-11}", "{test-guid-12}"] from the first two entries in the sample data where the geo_city data ''' # Create a new instance of a SimilarityRecommender. ctx = create_cat_test_ctx() ctx2 = create_cts_test_ctx() wrapped = ctx2.wrap(ctx) r = SimilarityRecommender(wrapped) # In the ensemble method recommendations should be a sorted list of tuples # containing [(guid, weight), (guid, weight)... (guid, weight)]. recommendation_list = r.recommend(generate_a_fake_taar_client(), 2) assert len(recommendation_list) == 2 # Make sure the structure of the recommendations is correct and that we recommended the the right addons. for recommendation, weight in recommendation_list: assert isinstance(recommendation, six.string_types) assert isinstance(weight, float) # Test that sorting is appropriate. rec0 = recommendation_list[0] rec1 = recommendation_list[1] rec0_weight = rec0[1] rec1_weight = rec1[1] assert rec0_weight > rec1_weight > 0
def test_soft_fail(test_ctx, caplog): # Create a new instance of a SimilarityRecommender. with mock_install_no_data(test_ctx): r = SimilarityRecommender(test_ctx) # Don't recommend if the source files cannot be found. assert not r.can_recommend({})
def test_weights_continuous(): # Create a new instance of a SimilarityRecommender. ctx = create_cts_test_ctx() r = SimilarityRecommender(ctx) # In the ensemble method recommendations should be a sorted list of tuples # containing [(guid, weight), (guid, weight)... (guid, weight)]. recommendation_list = r.recommend(generate_a_fake_taar_client(), 2) with open('/tmp/similarity_recommender.json', 'w') as fout: fout.write(json.dumps(recommendation_list)) # Make sure the structure of the recommendations is correct and # that we recommended the the right addons. assert len(recommendation_list) == 2 for recommendation, weight in recommendation_list: assert isinstance(recommendation, six.string_types) assert isinstance(weight, float) # Test that sorting is appropriate. rec0 = recommendation_list[0] rec1 = recommendation_list[1] rec0_weight = rec0[1] rec1_weight = rec1[1] # Duplicate presence of test-guid-1 should mean rec0_weight is double # rec1_weight, and both should be greater than 1.0 assert rec0_weight > rec1_weight > 1.0
def test_weights_continuous(mock_s3_continuous_data): # Create a new instance of a SimilarityRecommender. r = SimilarityRecommender() # In the ensemble method recommendations shoudl be a sorted list of tuples # containing [(guid, weight), (guid, weight)... (guid, weight)]. recommendation_list = r.recommend(generate_a_fake_taar_client(), 2) with open('/tmp/similarity_recommender.json', 'w') as fout: fout.write(json.dumps(recommendation_list)) # Make sure the structure of the recommendations is correct and # that we recommended the the right addons. assert len(recommendation_list) == 2 for recommendation, weight in recommendation_list: assert isinstance(recommendation, str) assert isinstance(weight, float) # Test that sorting is appropriate. rec0 = recommendation_list[0] rec1 = recommendation_list[1] rec0_weight = rec0[1] rec1_weight = rec1[1] assert rec0_weight == rec1_weight > 0
def test_soft_fail(test_ctx, caplog): # Create a new instance of a SimilarityRecommender. ctx = install_no_data(test_ctx) r = SimilarityRecommender(ctx) # Don't recommend if the source files cannot be found. assert not r.can_recommend({}) assert not check_matrix_built(caplog)
def test_soft_fail(): # Create a new instance of a SimilarityRecommender. ctx = Context() ctx['utils'] = MockNoDataUtils() ctx['clock'] = Clock() ctx['cache'] = JSONCache(ctx) r = SimilarityRecommender(ctx) # Don't recommend if the source files cannot be found. assert not r.can_recommend({})
def test_compute_clients_dist(test_ctx): # Test the distance function computation. with mock_install_continuous_data(test_ctx): r = SimilarityRecommender(test_ctx) test_clients = [ { "client_id": "test-client-002", "activeAddons": [], "geo_city": "sfo-us", "subsession_length": 1, "locale": "en-US", "os": "windows", "bookmark_count": 1, "tab_open_count": 1, "total_uri": 1, "unique_tlds": 1, }, { "client_id": "test-client-003", "activeAddons": [], "geo_city": "brasilia-br", "subsession_length": 1, "locale": "br-PT", "os": "windows", "bookmark_count": 10, "tab_open_count": 1, "total_uri": 1, "unique_tlds": 1, }, { "client_id": "test-client-004", "activeAddons": [], "geo_city": "brasilia-br", "subsession_length": 100, "locale": "br-PT", "os": "windows", "bookmark_count": 10, "tab_open_count": 10, "total_uri": 100, "unique_tlds": 10, }, ] per_client_test = [] # Compute a different set of distances for each set of clients. cache = r._get_cache({}) for tc in test_clients: test_distances = r.compute_clients_dist(tc, cache) assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA) per_client_test.append(test_distances[2][0]) # Ensure the different clients also had different distances to a specific donor. assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
def test_recommendations(test_ctx): # Create a new instance of a SimilarityRecommender. ctx = install_continuous_data(test_ctx) r = SimilarityRecommender(ctx) recommendation_list = r.recommend(generate_a_fake_taar_client(), 1) assert isinstance(recommendation_list, list) assert len(recommendation_list) == 1 recommendation, weight = recommendation_list[0] # Make sure that the reported addons are the expected ones from the most similar donor. assert "{test-guid-1}" == recommendation assert type(weight) == np.float64
def test_recommendations(instantiate_mocked_s3_bucket): # Create a new instance of a SimilarityRecommender. r = SimilarityRecommender() recommendations = r.recommend(generate_a_fake_taar_client(), 10) # Make sure the structure of the recommendations is correct and that we recommended the the right addons. assert isinstance(recommendations, list) # Make sure that the reported addons are the expected ones from the most similar donor. assert "{test-guid-9}" in recommendations assert "{test-guid-10}" in recommendations assert "{test-guid-11}" in recommendations assert "{test-guid-12}" in recommendations assert len(recommendations) == 4
def test_recommendations(mock_s3_continuous_data): # Create a new instance of a SimilarityRecommender. r = SimilarityRecommender() # TODO: clobber the SimilarityRecommender::lr_curves recommendation_list = r.recommend(generate_a_fake_taar_client(), 1) assert isinstance(recommendation_list, list) assert len(recommendation_list) == 1 recommendation, weight = recommendation_list[0] # Make sure that the reported addons are the expected ones from the most similar donor. assert "{test-guid-1}" == recommendation assert type(weight) == np.float64
def test_get_lr(test_ctx): # Tests that the likelihood ratio values are not empty for extreme values and are realistic. with mock_install_continuous_data(test_ctx): r = SimilarityRecommender(test_ctx) cache = r._get_cache({}) assert r.get_lr(0.0001, cache) is not None assert r.get_lr(10.0, cache) is not None assert r.get_lr(0.001, cache) > r.get_lr(5.0, cache)
def test_distance_functions(test_ctx): # Tests the similarity functions via expected output when passing # modified client data. with mock_install_continuous_data(test_ctx): r = SimilarityRecommender(test_ctx) # Generate a fake client. test_client = generate_a_fake_taar_client() recs = r.recommend(test_client, 10) assert len(recs) > 0 # Make it a generally poor match for the donors. test_client.update({ "total_uri": 10, "bookmark_count": 2, "subsession_length": 10 }) all_client_values_zero = test_client # Make all categorical variables non-matching with any donor. all_client_values_zero.update({ key: "zero" for key in test_client.keys() if key in CATEGORICAL_FEATURES }) recs = r.recommend(all_client_values_zero, 10) assert len(recs) == 0 # Make all continuous variables equal to zero. all_client_values_zero.update({ key: 0 for key in test_client.keys() if key in CONTINUOUS_FEATURES }) recs = r.recommend(all_client_values_zero, 10) assert len(recs) == 0 # Make all categorical variables non-matching with any donor. all_client_values_high = test_client all_client_values_high.update({ key: "one billion" for key in test_client.keys() if key in CATEGORICAL_FEATURES }) recs = r.recommend(all_client_values_high, 10) assert len(recs) == 0 # Make all continuous variables equal to a very high numerical value. all_client_values_high.update({ key: 1e60 for key in test_client.keys() if key in CONTINUOUS_FEATURES }) recs = r.recommend(all_client_values_high, 10) assert len(recs) == 0 # Test for 0.0 values if j_c is not normalized and j_d is fine. j_c = 0.0 j_d = 0.42 assert abs(j_c * j_d) == 0.0 assert abs((j_c + 0.01) * j_d) != 0.0
def test_get_lr(): # Tests that the likelihood ratio values are not empty for extreme values and are realistic. ctx = create_cts_test_ctx() r = SimilarityRecommender(ctx) assert r.get_lr(0.0001) is not None assert r.get_lr(10.0) is not None assert r.get_lr(0.001) > r.get_lr(5.0)
def test_get_lr(test_ctx): # Tests that the likelihood ratio values are not empty for extreme values and are realistic. ctx = install_continuous_data(test_ctx) r = SimilarityRecommender(ctx) assert r.get_lr(0.0001) is not None assert r.get_lr(10.0) is not None assert r.get_lr(0.001) > r.get_lr(5.0)
def test_can_recommend(test_ctx, caplog): caplog.set_level(logging.INFO) # Create a new instance of a SimilarityRecommender. ctx = install_continuous_data(test_ctx) r = SimilarityRecommender(ctx) assert check_matrix_built(caplog) # Test that we can't recommend if we have not enough client info. assert not r.can_recommend({}) # Test that we can recommend for a normal client. assert r.can_recommend(generate_a_fake_taar_client()) # Check that we can not recommend if any required client field is missing. required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES for required_field in required_fields: profile_without_x = generate_a_fake_taar_client() # Make an empty value in a required field in the client info dict. profile_without_x[required_field] = None assert not r.can_recommend(profile_without_x) # Completely remove (in place) the entire required field from the dict. del profile_without_x[required_field] assert not r.can_recommend(profile_without_x)
def test_recompute_matrices(test_ctx, caplog): caplog.set_level(logging.INFO) # Create a new instance of a SimilarityRecommender. ctx = install_continuous_data(test_ctx) r = SimilarityRecommender(ctx) # Reloading the donors pool should reconstruct the matrices caplog.clear() r._donors_pool.force_expiry() r.donors_pool assert check_matrix_built(caplog) # Reloading the LR curves should reconstruct the matrices caplog.clear() r._lr_curves.force_expiry() r.lr_curves assert check_matrix_built(caplog)
def test_can_recommend(instantiate_mocked_s3_bucket): # Create a new instance of a SimilarityRecommender. r = SimilarityRecommender() # Test that we can't recommend if we have not enough client info. assert not r.can_recommend({}) # Test that we can recommend for a normal client. assert r.can_recommend(generate_a_fake_taar_client()) # Check that we can not recommend if any required client field is missing. required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES for required_field in required_fields: profile_without_x = generate_a_fake_taar_client() # Make an empty value in a required field in the client info dict. profile_without_x[required_field] = None assert not r.can_recommend(profile_without_x) # Completely remove (in place) the entire required field from the dict. del profile_without_x[required_field] assert not r.can_recommend(profile_without_x)
def test_recommender_str(): # Tests that the string representation of the recommender is correct. ctx = create_cts_test_ctx() r = SimilarityRecommender(ctx) assert str(r) == "SimilarityRecommender"
def test_soft_fail(): # Create a new instance of a SimilarityRecommender. r = SimilarityRecommender() # Don't recommend if the source files cannot be found. assert not r.can_recommend({})
def test_get_lr(instantiate_mocked_s3_bucket): # Tests that the likelihood ratio values are not empty for extreme values and are realistic. r = SimilarityRecommender() assert r.get_lr(0.0001) is not None assert r.get_lr(10.0) is not None assert r.get_lr(0.001) > r.get_lr(5.0)
def test_recommender_str(instantiate_mocked_s3_bucket): # Tests that the string representation of the recommender is correct. r = SimilarityRecommender() assert str(r) == "SimilarityRecommender"
addons = client['installed_addons'] num_mask = get_num_masked(addons) masked, unmasked = random_partition(addons, num_mask) client['installed_addons'] = unmasked client['masked_addons'] = masked return client training_masked = map(mask_addons, training) recommenders = { "collaborative": CollaborativeRecommender(), "similarity": SimilarityRecommender(), "locale": LocaleRecommender("./top_addons_by_locale.json"), "legacy": LegacyRecommender() } def compute_features(client_data): recommendations = [] matrix = [] for _, recommender in recommenders.items(): recommendations.append(recommender.get_weighted_recommendations(client_data)) for addon in whitelist: matrix.append([features[addon] for features in recommendations]) return client_data, np.array(matrix)
def test_recommender_str(mock_s3_continuous_data): # Tests that the string representation of the recommender is correct. r = SimilarityRecommender() assert str(r) == "SimilarityRecommender"
def test_get_lr(mock_s3_continuous_data): # Tests that the likelihood ratio values are not empty for extreme values and are realistic. r = SimilarityRecommender() assert r.get_lr(0.0001) is not None assert r.get_lr(10.0) is not None assert r.get_lr(0.001) > r.get_lr(5.0)
def test_recommender_str(test_ctx): # Tests that the string representation of the recommender is correct. ctx = install_continuous_data(test_ctx) r = SimilarityRecommender(ctx) assert str(r) == "SimilarityRecommender"