Esempio n. 1
0
 def setUp(self):
     mock_broker = MagicMock()
     self.recommender = SommelierRecommenderBase(b=mock_broker)
Esempio n. 2
0
 def setUp(self):
     mock_broker = MagicMock()
     self.recommender = SommelierRecommenderBase(b=mock_broker)
Esempio n. 3
0
class RecommenderTest(unittest.TestCase):

    dummy_tastings = [
        { "author_id": 1, "wine_id": 5, "rating": 9 },
        { "author_id": 2, "wine_id": 6, "rating": 10 },
        { "author_id": 3, "wine_id": 7, "rating": 11 },
        { "author_id": 4, "wine_id": 8, "rating": 12 }]

    expected_preferences_1 = {
        1: [ 0, 9, 0, 0 ],
        2: [ 0, 0, 10, 0 ],
        3: [ 0, 0, 0, 11 ],
        4: [ 12, 0, 0, 0 ]
        }, [8, 5, 6, 7]

    expected_preferences_2 = {
        5: [ 9, 0, 0, 0 ],
        6: [ 0, 10, 0, 0 ],
        7: [ 0, 0, 11, 0 ],
        8: [ 0, 0, 0, 12 ]
        }, [1, 2, 3, 4]

    pearson_r_test_preferences = {
        "row_1": [ 1, 2, 3, 4, 5 ],
        "row_2": [ 2, 3, 4, 5, 6 ],
        "row_3": [ 5, 4, 3, 2, 1 ],
        "row_4": [ 3, 3, 3, 3, 3 ],
        "row_5": [ 1, 2 ],
        "row_6": [ 3, 1 ]}

    dummy_line_tab_separated = "1\t2\t3\t4"
    
    dummy_line_colon_separated = "1::2::3::4"

    dummy_line_invalid_separator = "1BAZ2BAZ3BAZ4"

    dummy_line_invalid_values = "Should::NOT::be:Strings"

    expected_tasting = {'author_id': 1, 'rating': 3, 'tasting_date': '1970-01-01 01:00:04', 'wine_id': 2}

    expected_movielens_lines_colon = [ "1::5::9::0", "2::6::10::0", "3::7::11::0", "4::8::12::0" ]

    expected_movielens_lines_tab = [ "1\t5\t9\t0", "2\t6\t10\t0", "3\t7\t11\t0", "4\t8\t12\t0" ]

    dummy_matrix_a = [[1,1,1,1],[2,2,2,2],[3,3,3,3]]

    dummy_matrix_b = [[1,1,1,1],[2,4,4,2],[3,3,3,1]]

    dummy_matrix_c = [[0,2,0,2],[0,2,0,2],[2,2,2,2]]

    expected_mae_a_b = ((0.0,1.0,0.5),0.5)

    expected_mae_a_c = ((1.0,1.0,1.0),1.0)

    def setUp(self):
        mock_broker = MagicMock()
        self.recommender = SommelierRecommenderBase(b=mock_broker)

    def test_pearson_r(self):
        # rows 1 and 2 have 100% positive correlation
        self.assertEqual(1.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_2'))

        # rows 1 and 3 have 100% negative correlation
        self.assertEqual(-1.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_3'))

        # row 4 has a standard deviation of 0, which we need to deal with to avoid a division by zero error
        # when the covariance is divided by the product of the standard deviations
        # in this case the method returns 0.0, on the basis that a user submitting the same rating for every 
        # item is not expressing any preference at all, so a neutral similarity score is appropriate
        self.assertEqual(0.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_4'))

        # cases where there are two or less items for comparison are problematic for pearson_r; any two
        # lists with 2 items will always result in either 1.0 or -1.0. That is not a useful score, as
        # there may be no similarity in the ratings at all, so we return 0.0 if there are < 3 items
        self.assertEqual(0.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_5', 'row_6'))

    def test_preferences(self):
        # preferences formatting for author rows / wine columns
        self.assertEqual(self.expected_preferences_1, self.recommender.preferences(self.dummy_tastings, 'author_id', 'wine_id')) 

        # preferences formatting for wine rows / author columns
        self.assertEqual(self.expected_preferences_2, self.recommender.preferences(self.dummy_tastings, 'wine_id', 'author_id')) 
    
    # Movielens data can be encoded with either tabs or double colons (::), so test for both and neither...
    def test_movielens_line_to_tasting(self):
        self.assertEqual(self.expected_tasting, self.recommender.movielens_line_to_tasting(self.dummy_line_tab_separated))
        self.assertEqual(self.expected_tasting, self.recommender.movielens_line_to_tasting(self.dummy_line_colon_separated))
        self.assertRaises(Exception, lambda _: self.recommender.movielens_line_to_tasting(self.dummy_line_invalid_separator))
        self.assertRaises(Exception, lambda _: self.recommender.movielens_line_to_tasting(self.dummy_line_invalid_values))

    # tastings_to_movielens_format() should use double colon (::) separator by default
    def test_tastings_to_movielens_format(self):
        self.assertEqual(self.expected_movielens_lines_colon, self.recommender.tastings_to_movielens_format(self.dummy_tastings))
        self.assertEqual(self.expected_movielens_lines_colon, self.recommender.tastings_to_movielens_format(self.dummy_tastings, separator="::"))
        self.assertEqual(self.expected_movielens_lines_tab, self.recommender.tastings_to_movielens_format(self.dummy_tastings, separator="\t"))

    def test_evaluate_matrices(self):
        self.assertEquals(self.expected_mae_a_b, self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a, self.dummy_matrix_b))
        self.assertEquals(self.expected_mae_a_c, self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a, self.dummy_matrix_c))
Esempio n. 4
0
class RecommenderTest(unittest.TestCase):

    dummy_tastings = [{
        "author_id": 1,
        "wine_id": 5,
        "rating": 9
    }, {
        "author_id": 2,
        "wine_id": 6,
        "rating": 10
    }, {
        "author_id": 3,
        "wine_id": 7,
        "rating": 11
    }, {
        "author_id": 4,
        "wine_id": 8,
        "rating": 12
    }]

    expected_preferences_1 = {
        1: [0, 9, 0, 0],
        2: [0, 0, 10, 0],
        3: [0, 0, 0, 11],
        4: [12, 0, 0, 0]
    }, [8, 5, 6, 7]

    expected_preferences_2 = {
        5: [9, 0, 0, 0],
        6: [0, 10, 0, 0],
        7: [0, 0, 11, 0],
        8: [0, 0, 0, 12]
    }, [1, 2, 3, 4]

    pearson_r_test_preferences = {
        "row_1": [1, 2, 3, 4, 5],
        "row_2": [2, 3, 4, 5, 6],
        "row_3": [5, 4, 3, 2, 1],
        "row_4": [3, 3, 3, 3, 3],
        "row_5": [1, 2],
        "row_6": [3, 1]
    }

    dummy_line_tab_separated = "1\t2\t3\t4"

    dummy_line_colon_separated = "1::2::3::4"

    dummy_line_invalid_separator = "1BAZ2BAZ3BAZ4"

    dummy_line_invalid_values = "Should::NOT::be:Strings"

    expected_tasting = {
        'author_id': 1,
        'rating': 3,
        'tasting_date': '1970-01-01 01:00:04',
        'wine_id': 2
    }

    expected_movielens_lines_colon = [
        "1::5::9::0", "2::6::10::0", "3::7::11::0", "4::8::12::0"
    ]

    expected_movielens_lines_tab = [
        "1\t5\t9\t0", "2\t6\t10\t0", "3\t7\t11\t0", "4\t8\t12\t0"
    ]

    dummy_matrix_a = [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]]

    dummy_matrix_b = [[1, 1, 1, 1], [2, 4, 4, 2], [3, 3, 3, 1]]

    dummy_matrix_c = [[0, 2, 0, 2], [0, 2, 0, 2], [2, 2, 2, 2]]

    expected_mae_a_b = ((0.0, 1.0, 0.5), 0.5)

    expected_mae_a_c = ((1.0, 1.0, 1.0), 1.0)

    def setUp(self):
        mock_broker = MagicMock()
        self.recommender = SommelierRecommenderBase(b=mock_broker)

    def test_pearson_r(self):
        # rows 1 and 2 have 100% positive correlation
        self.assertEqual(
            1.0,
            self.recommender.pearson_r(self.pearson_r_test_preferences,
                                       'row_1', 'row_2'))

        # rows 1 and 3 have 100% negative correlation
        self.assertEqual(
            -1.0,
            self.recommender.pearson_r(self.pearson_r_test_preferences,
                                       'row_1', 'row_3'))

        # row 4 has a standard deviation of 0, which we need to deal with to avoid a division by zero error
        # when the covariance is divided by the product of the standard deviations
        # in this case the method returns 0.0, on the basis that a user submitting the same rating for every
        # item is not expressing any preference at all, so a neutral similarity score is appropriate
        self.assertEqual(
            0.0,
            self.recommender.pearson_r(self.pearson_r_test_preferences,
                                       'row_1', 'row_4'))

        # cases where there are two or less items for comparison are problematic for pearson_r; any two
        # lists with 2 items will always result in either 1.0 or -1.0. That is not a useful score, as
        # there may be no similarity in the ratings at all, so we return 0.0 if there are < 3 items
        self.assertEqual(
            0.0,
            self.recommender.pearson_r(self.pearson_r_test_preferences,
                                       'row_5', 'row_6'))

    def test_preferences(self):
        # preferences formatting for author rows / wine columns
        self.assertEqual(
            self.expected_preferences_1,
            self.recommender.preferences(self.dummy_tastings, 'author_id',
                                         'wine_id'))

        # preferences formatting for wine rows / author columns
        self.assertEqual(
            self.expected_preferences_2,
            self.recommender.preferences(self.dummy_tastings, 'wine_id',
                                         'author_id'))

    # Movielens data can be encoded with either tabs or double colons (::), so test for both and neither...
    def test_movielens_line_to_tasting(self):
        self.assertEqual(
            self.expected_tasting,
            self.recommender.movielens_line_to_tasting(
                self.dummy_line_tab_separated))
        self.assertEqual(
            self.expected_tasting,
            self.recommender.movielens_line_to_tasting(
                self.dummy_line_colon_separated))
        self.assertRaises(
            Exception, lambda _: self.recommender.movielens_line_to_tasting(
                self.dummy_line_invalid_separator))
        self.assertRaises(
            Exception, lambda _: self.recommender.movielens_line_to_tasting(
                self.dummy_line_invalid_values))

    # tastings_to_movielens_format() should use double colon (::) separator by default
    def test_tastings_to_movielens_format(self):
        self.assertEqual(
            self.expected_movielens_lines_colon,
            self.recommender.tastings_to_movielens_format(self.dummy_tastings))
        self.assertEqual(
            self.expected_movielens_lines_colon,
            self.recommender.tastings_to_movielens_format(self.dummy_tastings,
                                                          separator="::"))
        self.assertEqual(
            self.expected_movielens_lines_tab,
            self.recommender.tastings_to_movielens_format(self.dummy_tastings,
                                                          separator="\t"))

    def test_evaluate_matrices(self):
        self.assertEquals(
            self.expected_mae_a_b,
            self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a,
                                                      self.dummy_matrix_b))
        self.assertEquals(
            self.expected_mae_a_c,
            self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a,
                                                      self.dummy_matrix_c))