def test_bpmf_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-3) bpmf1.fit(ratings, n_iters=5) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) bpmf2.fit(ratings, n_iters=5) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_als_with_random_data(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=1) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_als_seed(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) # seed 0 als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=3) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) assert_array_equal(als1.user_features_, als2.user_features_) assert_array_equal(als1.item_features_, als2.item_features_) # seed 1 als3 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=1, max_rating=self.max_rat, min_rating=self.min_rat) als3.fit(ratings, n_iters=3) assert_raises(AssertionError, assert_array_equal, als1.user_features_, als3.user_features_) assert_raises(AssertionError, assert_array_equal, als1.item_features_, als3.item_features_)
def test_als_with_missing_data(self): n_user = 10 n_item = 20 n_feature = self.n_feature ratings = make_ratings(n_user - 1, n_item - 1, 5, 10, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) unuse_user_f_before = als1.user_features_[n_user - 1, :] unuse_item_f_before = als1.item_features_[n_item - 1, :] als1.fit(ratings, n_iters=1) unuse_user_f_after = als1.user_features_[n_user - 1, :] unuse_item_f_after = als1.item_features_[n_item - 1, :] # last user/item feature should be # unchanged since no rating data on them assert_array_equal(unuse_user_f_before, unuse_user_f_after) assert_array_equal(unuse_item_f_before, unuse_item_f_after)
def test_bpmf_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) bpmf1.fit(ratings, n_iters=5) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) bpmf2.fit(ratings, n_iters=5) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_bpmf_with_random_data(self): n_user = 1000 n_item = 2000 ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_als_with_random_data(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=1) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_als_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) als1.fit(ratings, n_iters=10) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) als2.fit(ratings, n_iters=10) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_als_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) als1.fit(ratings, n_iters=10) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) als2.fit(ratings, n_iters=10) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_pmf_with_random_data(self): n_user = 1000 n_item = 2000 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) pmf1 = PMF(n_user, n_item, n_feature, batch_size=1000., epsilon=10., seed=0, max_rating=self.max_rat, min_rating=self.min_rat) pmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(pmf1.predict(ratings[:, :2]), ratings[:, 2]) pmf2 = PMF(n_user, n_item, n_feature, batch_size=1000., epsilon=10., seed=0, max_rating=self.max_rat, min_rating=self.min_rat) pmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(pmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_als_not_fitted_err(self): with self.assertRaises(NotFittedError): ratings = make_ratings(10, 10, 1, 5, self.rating_choices, seed=self.seed) als = ALS(10, 10, self.n_feature) als.predict(ratings[:, :2])
def test_bpmf_not_fitted_err(self): with self.assertRaises(NotFittedError): ratings = make_ratings(10, 10, 1, 5, self.rating_choices, seed=self.seed) bpmf = BPMF(10, 10, self.n_feature) bpmf.predict(ratings[:, :2])
def test_make_ratings(self): user_size = [10, 20, 50] item_size = [50, 100, 200] min_cnts = [1, 5, 10] max_cnts = [5, 10, 15] choices = list(range(1, 10)) params = zip(user_size, item_size, min_cnts, max_cnts) for (n_user, n_item, min_cnt, max_cnt) in params: ratings = make_ratings(n_user, n_item, min_cnt, max_cnt, choices) self.assertTrue(isinstance(ratings, np.ndarray)) self.assertTrue(int(ratings[:, 0].max()) < n_user) self.assertTrue(int(ratings[:, 1].max()) < n_item) self.assertTrue(ratings[:, 2].max() <= max(choices)) self.assertTrue(ratings[:, 2].min() >= min(choices))
def test_make_ratings(self): user_size = [10, 20, 50] item_size = [50, 100, 200] min_cnts = [1, 5, 10] max_cnts = [5, 10, 15] choices = list(range(1, 10)) for (n_user, n_item, min_cnt, max_cnt) in zip(user_size, item_size, min_cnts, max_cnts): ratings = make_ratings(n_user, n_item, min_cnt, max_cnt, choices) self.assertTrue(isinstance(ratings, np.ndarray)) self.assertTrue(int(ratings[:, 0].max()) < n_user) self.assertTrue(int(ratings[:, 1].max()) < n_item) self.assertTrue(ratings[:, 2].max() <= max(choices)) self.assertTrue(ratings[:, 2].min() >= min(choices))
def test_make_ratings_input_check(self): with self.assertRaises(ValueError): make_ratings(10, 10, 5, 10, [1, 2, 3]) with self.assertRaises(ValueError): make_ratings(10, 10, 5, 4, [1, 2, 3]) with self.assertRaises(ValueError): make_ratings(10, 10, 5, 6, 2)
def test_build_user_item_matrix(self): n_user = 200 n_item = 300 choices = list(range(1, 5)) ratings = make_ratings(n_user, n_item, 5, 10, choices) mtx = build_user_item_matrix(n_user, n_item, ratings) self.assertTrue(sparse.issparse(mtx)) self.assertEqual(mtx.shape[0], n_user) self.assertEqual(mtx.shape[1], n_item) dense_mtx = mtx.toarray() for i in xrange(ratings.shape[0]): user_idx = ratings[i][0] item_idx = ratings[i][1] rating = ratings[i][2] np_test.assert_almost_equal(dense_mtx[user_idx, item_idx], rating)
def test_als_seed(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) # seed 0 als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=3) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) assert_array_equal(als1.user_features_, als2.user_features_) assert_array_equal(als1.item_features_, als2.item_features_) # seed 1 als3 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=1, max_rating=self.max_rat, min_rating=self.min_rat) als3.fit(ratings, n_iters=3) assert_raises(AssertionError, assert_array_equal, als1.user_features_, als3.user_features_) assert_raises(AssertionError, assert_array_equal, als1.item_features_, als3.item_features_)
def test_als_with_missing_data(self): n_user = 10 n_item = 20 n_feature = self.n_feature ratings = make_ratings( n_user - 1, n_item - 1, 5, 10, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) unuse_user_f_before = als1.user_features_[n_user - 1, :] unuse_item_f_before = als1.item_features_[n_item - 1, :] als1.fit(ratings, n_iters=1) unuse_user_f_after = als1.user_features_[n_user - 1, :] unuse_item_f_after = als1.item_features_[n_item - 1, :] # last user/item feature should be # unchanged since no rating data on them assert_array_equal(unuse_user_f_before, unuse_user_f_after) assert_array_equal(unuse_item_f_before, unuse_item_f_after)
def test_bpmf_with_random_data(self): n_user = 1000 n_item = 2000 ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def setUp(self): self.n_user = 200 self.n_item = 100 self.choices = list(range(1, 10)) self.ratings = make_ratings( self.n_user, self.n_item, 10, 20, self.choices)
def test_bpmf_not_fitted_err(self): with self.assertRaises(NotFittedError): ratings = make_ratings( 10, 10, 1, 5, self.rating_choices, seed=self.seed) bpmf = BPMF(10, 10, self.n_feature) bpmf.predict(ratings[:, :2])
def test_als_not_fitted_err(self): with self.assertRaises(NotFittedError): ratings = make_ratings( 10, 10, 1, 5, self.rating_choices, seed=self.seed) als = ALS(10, 10, self.n_feature) als.predict(ratings[:, :2])
def setUp(self): self.n_user = 200 self.n_item = 100 self.choices = list(range(1, 10)) self.ratings = make_ratings(self.n_user, self.n_item, 10, 20, self.choices)