def test_distance_functions(test_ctx):
    # Tests the similarity functions via expected output when passing
    # modified client data.
    with mock_install_continuous_data(test_ctx):
        r = SimilarityRecommender(test_ctx)

        # Generate a fake client.
        test_client = generate_a_fake_taar_client()
        recs = r.recommend(test_client, 10)
        assert len(recs) > 0

        # Make it a generally poor match for the donors.
        test_client.update({
            "total_uri": 10,
            "bookmark_count": 2,
            "subsession_length": 10
        })

        all_client_values_zero = test_client
        # Make all categorical variables non-matching with any donor.
        all_client_values_zero.update({
            key: "zero"
            for key in test_client.keys() if key in CATEGORICAL_FEATURES
        })
        recs = r.recommend(all_client_values_zero, 10)
        assert len(recs) == 0

        # Make all continuous variables equal to zero.
        all_client_values_zero.update({
            key: 0
            for key in test_client.keys() if key in CONTINUOUS_FEATURES
        })
        recs = r.recommend(all_client_values_zero, 10)
        assert len(recs) == 0

        # Make all categorical variables non-matching with any donor.
        all_client_values_high = test_client
        all_client_values_high.update({
            key: "one billion"
            for key in test_client.keys() if key in CATEGORICAL_FEATURES
        })
        recs = r.recommend(all_client_values_high, 10)
        assert len(recs) == 0

        # Make all continuous variables equal to a very high numerical value.
        all_client_values_high.update({
            key: 1e60
            for key in test_client.keys() if key in CONTINUOUS_FEATURES
        })
        recs = r.recommend(all_client_values_high, 10)
        assert len(recs) == 0

        # Test for 0.0 values if j_c is not normalized and j_d is fine.
        j_c = 0.0
        j_d = 0.42
        assert abs(j_c * j_d) == 0.0
        assert abs((j_c + 0.01) * j_d) != 0.0
def test_weights_categorical():
    '''
    This should get :
        ["{test-guid-1}", "{test-guid-2}", "{test-guid-3}", "{test-guid-4}"],
        ["{test-guid-9}", "{test-guid-10}", "{test-guid-11}", "{test-guid-12}"]
    from the first two entries in the sample data where the geo_city
    data

    '''
    # Create a new instance of a SimilarityRecommender.
    ctx = create_cat_test_ctx()
    ctx2 = create_cts_test_ctx()
    wrapped = ctx2.wrap(ctx)
    r = SimilarityRecommender(wrapped)

    # In the ensemble method recommendations should be a sorted list of tuples
    # containing [(guid, weight), (guid, weight)... (guid, weight)].
    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)

    assert len(recommendation_list) == 2
    # Make sure the structure of the recommendations is correct and that we recommended the the right addons.
    for recommendation, weight in recommendation_list:
        assert isinstance(recommendation, six.string_types)
        assert isinstance(weight, float)

    # Test that sorting is appropriate.
    rec0 = recommendation_list[0]
    rec1 = recommendation_list[1]

    rec0_weight = rec0[1]
    rec1_weight = rec1[1]

    assert rec0_weight > rec1_weight > 0
def test_weights_continuous():
    # Create a new instance of a SimilarityRecommender.
    ctx = create_cts_test_ctx()
    r = SimilarityRecommender(ctx)

    # In the ensemble method recommendations should be a sorted list of tuples
    # containing [(guid, weight), (guid, weight)... (guid, weight)].
    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
    with open('/tmp/similarity_recommender.json', 'w') as fout:
        fout.write(json.dumps(recommendation_list))

    # Make sure the structure of the recommendations is correct and
    # that we recommended the the right addons.

    assert len(recommendation_list) == 2
    for recommendation, weight in recommendation_list:
        assert isinstance(recommendation, six.string_types)
        assert isinstance(weight, float)

    # Test that sorting is appropriate.
    rec0 = recommendation_list[0]
    rec1 = recommendation_list[1]

    rec0_weight = rec0[1]
    rec1_weight = rec1[1]

    # Duplicate presence of test-guid-1 should mean rec0_weight is double
    # rec1_weight, and both should be greater than 1.0

    assert rec0_weight > rec1_weight > 1.0
def test_weights_continuous(mock_s3_continuous_data):
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    # In the ensemble method recommendations shoudl be a sorted list of tuples
    # containing [(guid, weight), (guid, weight)... (guid, weight)].
    recommendation_list = r.recommend(generate_a_fake_taar_client(), 2)
    with open('/tmp/similarity_recommender.json', 'w') as fout:
        fout.write(json.dumps(recommendation_list))

    # Make sure the structure of the recommendations is correct and
    # that we recommended the the right addons.

    assert len(recommendation_list) == 2
    for recommendation, weight in recommendation_list:
        assert isinstance(recommendation, str)
        assert isinstance(weight, float)

    # Test that sorting is appropriate.
    rec0 = recommendation_list[0]
    rec1 = recommendation_list[1]

    rec0_weight = rec0[1]
    rec1_weight = rec1[1]

    assert rec0_weight == rec1_weight > 0
Exemple #5
0
def test_recommendations(instantiate_mocked_s3_bucket):
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    recommendations = r.recommend(generate_a_fake_taar_client(), 10)

    # Make sure the structure of the recommendations is correct and that we recommended the the right addons.
    assert isinstance(recommendations, list)

    # Make sure that the reported addons are the expected ones from the most similar donor.
    assert "{test-guid-9}" in recommendations
    assert "{test-guid-10}" in recommendations
    assert "{test-guid-11}" in recommendations
    assert "{test-guid-12}" in recommendations
    assert len(recommendations) == 4
def test_recommendations(test_ctx):
    # Create a new instance of a SimilarityRecommender.
    ctx = install_continuous_data(test_ctx)
    r = SimilarityRecommender(ctx)

    recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)

    assert isinstance(recommendation_list, list)
    assert len(recommendation_list) == 1

    recommendation, weight = recommendation_list[0]

    # Make sure that the reported addons are the expected ones from the most similar donor.
    assert "{test-guid-1}" == recommendation
    assert type(weight) == np.float64
def test_recommendations(mock_s3_continuous_data):
    # Create a new instance of a SimilarityRecommender.
    r = SimilarityRecommender()

    # TODO: clobber the SimilarityRecommender::lr_curves

    recommendation_list = r.recommend(generate_a_fake_taar_client(), 1)

    assert isinstance(recommendation_list, list)
    assert len(recommendation_list) == 1

    recommendation, weight = recommendation_list[0]

    # Make sure that the reported addons are the expected ones from the most similar donor.
    assert "{test-guid-1}" == recommendation
    assert type(weight) == np.float64