def test_weights_categorical():
    '''
    This should get :
        ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
        ["{test-guid-9}", "{test-guid-10}", "{test-guid-11}", "{test-guid-12}"]
    from the first two entries in the sample data where the geo_city
    data

    '''
    # Create a new instance of a SimilarityRecommender.
    ctx = create_cat_test_ctx()
    ctx2 = create_cts_test_ctx()
    wrapped = ctx2.wrap(ctx)
    r = SimilarityRecommender(wrapped)

    # In the ensemble method recommendations should be a sorted list of tuples
    # containing [(guid, weight), (guid, weight)... (guid, weight)].
    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)

    assert len(recommendation_list) == 2
    # Make sure the structure of the recommendations is correct and that we recommended the the right addons.
    for recommendation, weight in recommendation_list:
        assert isinstance(recommendation, six.string_types)
        assert isinstance(weight, float)

    # Test that sorting is appropriate.
    rec0 = recommendation_list[0]
    rec1 = recommendation_list[1]

    rec0_weight = rec0[1]
    rec1_weight = rec1[1]

    assert rec0_weight > rec1_weight > 0
def test_soft_fail(test_ctx, caplog):
    # Create a new instance of a SimilarityRecommender.
    with mock_install_no_data(test_ctx):
        r = SimilarityRecommender(test_ctx)

        # Don't recommend if the source files cannot be found.
        assert not r.can_recommend({})
def test_weights_continuous():
    # Create a new instance of a SimilarityRecommender.
    ctx = create_cts_test_ctx()
    r = SimilarityRecommender(ctx)

    # In the ensemble method recommendations should be a sorted list of tuples
    # containing [(guid, weight), (guid, weight)... (guid, weight)].
    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
    with open('/tmp/similarity_recommender.json', 'w') as fout:
        fout.write(json.dumps(recommendation_list))

    # Make sure the structure of the recommendations is correct and
    # that we recommended the the right addons.

    assert len(recommendation_list) == 2
    for recommendation, weight in recommendation_list:
        assert isinstance(recommendation, six.string_types)
        assert isinstance(weight, float)

    # Test that sorting is appropriate.
    rec0 = recommendation_list[0]
    rec1 = recommendation_list[1]

    rec0_weight = rec0[1]
    rec1_weight = rec1[1]

    # Duplicate presence of test-guid-1 should mean rec0_weight is double
    # rec1_weight, and both should be greater than 1.0

    assert rec0_weight > rec1_weight > 1.0
def test_weights_continuous(mock_s3_continuous_data):
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    # In the ensemble method recommendations shoudl be a sorted list of tuples
    # containing [(guid, weight), (guid, weight)... (guid, weight)].
    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
    with open('/tmp/similarity_recommender.json', 'w') as fout:
        fout.write(json.dumps(recommendation_list))

    # Make sure the structure of the recommendations is correct and
    # that we recommended the the right addons.

    assert len(recommendation_list) == 2
    for recommendation, weight in recommendation_list:
        assert isinstance(recommendation, str)
        assert isinstance(weight, float)

    # Test that sorting is appropriate.
    rec0 = recommendation_list[0]
    rec1 = recommendation_list[1]

    rec0_weight = rec0[1]
    rec1_weight = rec1[1]

    assert rec0_weight == rec1_weight > 0
def test_soft_fail(test_ctx, caplog):
    # Create a new instance of a SimilarityRecommender.
    ctx = install_no_data(test_ctx)
    r = SimilarityRecommender(ctx)

    # Don't recommend if the source files cannot be found.
    assert not r.can_recommend({})
    assert not check_matrix_built(caplog)
def test_soft_fail():
    # Create a new instance of a SimilarityRecommender.
    ctx = Context()
    ctx['utils'] = MockNoDataUtils()
    ctx['clock'] = Clock()
    ctx['cache'] = JSONCache(ctx)
    r = SimilarityRecommender(ctx)

    # Don't recommend if the source files cannot be found.
    assert not r.can_recommend({})
def test_compute_clients_dist(test_ctx):
    # Test the distance function computation.
    with mock_install_continuous_data(test_ctx):
        r = SimilarityRecommender(test_ctx)
        test_clients = [
            {
                "client_id": "test-client-002",
                "activeAddons": [],
                "geo_city": "sfo-us",
                "subsession_length": 1,
                "locale": "en-US",
                "os": "windows",
                "bookmark_count": 1,
                "tab_open_count": 1,
                "total_uri": 1,
                "unique_tlds": 1,
            },
            {
                "client_id": "test-client-003",
                "activeAddons": [],
                "geo_city": "brasilia-br",
                "subsession_length": 1,
                "locale": "br-PT",
                "os": "windows",
                "bookmark_count": 10,
                "tab_open_count": 1,
                "total_uri": 1,
                "unique_tlds": 1,
            },
            {
                "client_id": "test-client-004",
                "activeAddons": [],
                "geo_city": "brasilia-br",
                "subsession_length": 100,
                "locale": "br-PT",
                "os": "windows",
                "bookmark_count": 10,
                "tab_open_count": 10,
                "total_uri": 100,
                "unique_tlds": 10,
            },
        ]
        per_client_test = []

        # Compute a different set of distances for each set of clients.
        cache = r._get_cache({})
        for tc in test_clients:
            test_distances = r.compute_clients_dist(tc, cache)
            assert len(test_distances) == len(CONTINUOUS_FEATURE_FIXTURE_DATA)
            per_client_test.append(test_distances[2][0])

        # Ensure the different clients also had different distances to a specific donor.
        assert per_client_test[0] >= per_client_test[1] >= per_client_test[2]
def test_recommendations(test_ctx):
    # Create a new instance of a SimilarityRecommender.
    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

    recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)

    assert isinstance(recommendation_list, list)
    assert len(recommendation_list) == 1

    recommendation, weight = recommendation_list[0]

    # Make sure that the reported addons are the expected ones from the most similar donor.
    assert "{test-guid-1}" == recommendation
    assert type(weight) == np.float64
Beispiel #9
0
def test_recommendations(instantiate_mocked_s3_bucket):
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    recommendations = r.recommend(generate_a_fake_taar_client(), 10)

    # Make sure the structure of the recommendations is correct and that we recommended the the right addons.
    assert isinstance(recommendations, list)

    # Make sure that the reported addons are the expected ones from the most similar donor.
    assert "{test-guid-9}" in recommendations
    assert "{test-guid-10}" in recommendations
    assert "{test-guid-11}" in recommendations
    assert "{test-guid-12}" in recommendations
    assert len(recommendations) == 4
def test_recommendations(mock_s3_continuous_data):
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    # TODO: clobber the SimilarityRecommender::lr_curves

    recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)

    assert isinstance(recommendation_list, list)
    assert len(recommendation_list) == 1

    recommendation, weight = recommendation_list[0]

    # Make sure that the reported addons are the expected ones from the most similar donor.
    assert "{test-guid-1}" == recommendation
    assert type(weight) == np.float64
def test_get_lr(test_ctx):
    # Tests that the likelihood ratio values are not empty for extreme values and are realistic.
    with mock_install_continuous_data(test_ctx):
        r = SimilarityRecommender(test_ctx)
        cache = r._get_cache({})
        assert r.get_lr(0.0001, cache) is not None
        assert r.get_lr(10.0, cache) is not None
        assert r.get_lr(0.001, cache) > r.get_lr(5.0, cache)
def test_distance_functions(test_ctx):
    # Tests the similarity functions via expected output when passing
    # modified client data.
    with mock_install_continuous_data(test_ctx):
        r = SimilarityRecommender(test_ctx)

        # Generate a fake client.
        test_client = generate_a_fake_taar_client()
        recs = r.recommend(test_client, 10)
        assert len(recs) > 0

        # Make it a generally poor match for the donors.
        test_client.update({
            "total_uri": 10,
            "bookmark_count": 2,
            "subsession_length": 10
        })

        all_client_values_zero = test_client
        # Make all categorical variables non-matching with any donor.
        all_client_values_zero.update({
            key: "zero"
            for key in test_client.keys() if key in CATEGORICAL_FEATURES
        })
        recs = r.recommend(all_client_values_zero, 10)
        assert len(recs) == 0

        # Make all continuous variables equal to zero.
        all_client_values_zero.update({
            key: 0
            for key in test_client.keys() if key in CONTINUOUS_FEATURES
        })
        recs = r.recommend(all_client_values_zero, 10)
        assert len(recs) == 0

        # Make all categorical variables non-matching with any donor.
        all_client_values_high = test_client
        all_client_values_high.update({
            key: "one billion"
            for key in test_client.keys() if key in CATEGORICAL_FEATURES
        })
        recs = r.recommend(all_client_values_high, 10)
        assert len(recs) == 0

        # Make all continuous variables equal to a very high numerical value.
        all_client_values_high.update({
            key: 1e60
            for key in test_client.keys() if key in CONTINUOUS_FEATURES
        })
        recs = r.recommend(all_client_values_high, 10)
        assert len(recs) == 0

        # Test for 0.0 values if j_c is not normalized and j_d is fine.
        j_c = 0.0
        j_d = 0.42
        assert abs(j_c * j_d) == 0.0
        assert abs((j_c + 0.01) * j_d) != 0.0
def test_get_lr():
    # Tests that the likelihood ratio values are not empty for extreme values and are realistic.
    ctx = create_cts_test_ctx()
    r = SimilarityRecommender(ctx)
    assert r.get_lr(0.0001) is not None
    assert r.get_lr(10.0) is not None
    assert r.get_lr(0.001) > r.get_lr(5.0)
def test_get_lr(test_ctx):
    # Tests that the likelihood ratio values are not empty for extreme values and are realistic.
    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)
    assert r.get_lr(0.0001) is not None
    assert r.get_lr(10.0) is not None
    assert r.get_lr(0.001) > r.get_lr(5.0)
def test_can_recommend(test_ctx, caplog):
    caplog.set_level(logging.INFO)

    # Create a new instance of a SimilarityRecommender.
    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

    assert check_matrix_built(caplog)

    # Test that we can't recommend if we have not enough client info.
    assert not r.can_recommend({})

    # Test that we can recommend for a normal client.
    assert r.can_recommend(generate_a_fake_taar_client())

    # Check that we can not recommend if any required client field is missing.
    required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES

    for required_field in required_fields:
        profile_without_x = generate_a_fake_taar_client()

        # Make an empty value in a required field in the client info dict.
        profile_without_x[required_field] = None
        assert not r.can_recommend(profile_without_x)

        # Completely remove (in place) the entire required field from the dict.
        del profile_without_x[required_field]
        assert not r.can_recommend(profile_without_x)
def test_recompute_matrices(test_ctx, caplog):
    caplog.set_level(logging.INFO)

    # Create a new instance of a SimilarityRecommender.
    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

    # Reloading the donors pool should reconstruct the matrices
    caplog.clear()
    r._donors_pool.force_expiry()
    r.donors_pool
    assert check_matrix_built(caplog)

    # Reloading the LR curves should reconstruct the matrices
    caplog.clear()
    r._lr_curves.force_expiry()
    r.lr_curves
    assert check_matrix_built(caplog)
Beispiel #17
0
def test_can_recommend(instantiate_mocked_s3_bucket):
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    # Test that we can't recommend if we have not enough client info.
    assert not r.can_recommend({})

    # Test that we can recommend for a normal client.
    assert r.can_recommend(generate_a_fake_taar_client())

    # Check that we can not recommend if any required client field is missing.
    required_fields = CATEGORICAL_FEATURES + CONTINUOUS_FEATURES

    for required_field in required_fields:
        profile_without_x = generate_a_fake_taar_client()

        # Make an empty value in a required field in the client info dict.
        profile_without_x[required_field] = None
        assert not r.can_recommend(profile_without_x)

        # Completely remove (in place) the entire required field from the dict.
        del profile_without_x[required_field]
        assert not r.can_recommend(profile_without_x)
def test_recommender_str():
    # Tests that the string representation of the recommender is correct.
    ctx = create_cts_test_ctx()
    r = SimilarityRecommender(ctx)
    assert str(r) == "SimilarityRecommender"
Beispiel #19
0
def test_soft_fail():
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    # Don't recommend if the source files cannot be found.
    assert not r.can_recommend({})
Beispiel #20
0
def test_get_lr(instantiate_mocked_s3_bucket):
    # Tests that the likelihood ratio values are not empty for extreme values and are realistic.
    r = SimilarityRecommender()
    assert r.get_lr(0.0001) is not None
    assert r.get_lr(10.0) is not None
    assert r.get_lr(0.001) > r.get_lr(5.0)
Beispiel #21
0
def test_recommender_str(instantiate_mocked_s3_bucket):
    # Tests that the string representation of the recommender is correct.
    r = SimilarityRecommender()
    assert str(r) == "SimilarityRecommender"
Beispiel #22
0
    addons = client['installed_addons']
    num_mask = get_num_masked(addons)

    masked, unmasked = random_partition(addons, num_mask)

    client['installed_addons'] = unmasked
    client['masked_addons'] = masked

    return client


training_masked = map(mask_addons, training)

recommenders = {
    "collaborative": CollaborativeRecommender(),
    "similarity": SimilarityRecommender(),
    "locale": LocaleRecommender("./top_addons_by_locale.json"),
    "legacy": LegacyRecommender()
}

def compute_features(client_data):
    recommendations = []
    matrix = []

    for _, recommender in recommenders.items():
        recommendations.append(recommender.get_weighted_recommendations(client_data))

    for addon in whitelist:
        matrix.append([features[addon] for features in recommendations])

    return client_data, np.array(matrix)
def test_recommender_str(mock_s3_continuous_data):
    # Tests that the string representation of the recommender is correct.
    r = SimilarityRecommender()
    assert str(r) == "SimilarityRecommender"
def test_get_lr(mock_s3_continuous_data):
    # Tests that the likelihood ratio values are not empty for extreme values and are realistic.
    r = SimilarityRecommender()
    assert r.get_lr(0.0001) is not None
    assert r.get_lr(10.0) is not None
    assert r.get_lr(0.001) > r.get_lr(5.0)
def test_recommender_str(test_ctx):
    # Tests that the string representation of the recommender is correct.
    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)
    assert str(r) == "SimilarityRecommender"