def test_bpmf_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) bpmf1.fit(ratings, n_iters=5) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) bpmf2.fit(ratings, n_iters=5) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_als_with_random_data(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=1) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_bpmf_with_random_data(self): n_user = 1000 n_item = 2000 ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_als_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) als1.fit(ratings, n_iters=10) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) als2.fit(ratings, n_iters=10) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_pmf_with_random_data(self): n_user = 1000 n_item = 2000 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) pmf1 = PMF(n_user, n_item, n_feature, batch_size=1000., epsilon=10., seed=0, max_rating=self.max_rat, min_rating=self.min_rat) pmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(pmf1.predict(ratings[:, :2]), ratings[:, 2]) pmf2 = PMF(n_user, n_item, n_feature, batch_size=1000., epsilon=10., seed=0, max_rating=self.max_rat, min_rating=self.min_rat) pmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(pmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_bpmf_with_ml_100k_rating(self): n_user = 943 n_item = 1682 n_feature = 10 ratings = self.ratings bpmf = BPMF(n_user, n_item, n_feature, max_rating=5., min_rating=1., seed=self.seed) bpmf.fit(ratings, n_iters=15) rmse = RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse < 0.85)
def test_als_with_ml_100k_rating(self): n_user = 943 n_item = 1682 n_feature = 10 ratings = self.ratings als = ALS(n_user, n_item, n_feature, reg=1e-2, max_rating=5., min_rating=1., seed=self.seed) als.fit(ratings, n_iters=5) rmse = RMSE(als.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse < 0.8)
def test_pmf_with_ml_100k_rating(self): n_user = 943 n_item = 1682 n_feature = 10 ratings = self.ratings pmf = PMF(n_user, n_item, n_feature, batch_size=1e4, epsilon=20., reg=1e-4, max_rating=5., min_rating=1., seed=self.seed) pmf.fit(ratings, n_iters=15) rmse = RMSE(pmf.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse < 0.85)
validation = ratings[train_size:] print(train.shape, validation.shape) print(train[0, :], train[1, :]) # plt.imshow(train, cmap='jet', interpolation='nearest') # plt.show() # plt.imshow(validation, cmap='jet', interpolation='nearest') # plt.show() # models settings n_feature = 10 eval_iters = 10 print( "n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % (n_user, n_item, n_feature, train.shape[0], validation.shape[0])) als = ALS(n_user=n_user, n_item=n_item, n_feature=n_feature, reg=5e-2, max_rating=5., min_rating=1., seed=0) als.fit(train, n_iters=eval_iters) train_preds = als.predict(train[:, :2]) train_rmse = RMSE(train_preds, train[:, 2]) val_preds = als.predict(validation[:, :2]) val_rmse = RMSE(val_preds, validation[:, 2]) print("after %d iterations, train RMSE: %.6f, validation RMSE: %.6f" % \ (eval_iters, train_rmse, val_rmse))
# -*- coding: utf-8 -*- """ Created on Sun May 26 17:34:22 2019 @author: dblab """ import numpy as np from recommend.bpmf import BPMF from recommend.utils.evaluation import RMSE from recommend.utils.datasets import load_movielens_1m_ratings # load user ratings ratings = load_movielens_1m_ratings('ml-1m/ratings.dat') n_user = max(ratings[:, 0]) n_item = max(ratings[:, 1]) ratings[:, (0, 1)] -= 1 # shift ids by 1 to let user_id & movie_id start from 0 # fit model bpmf = BPMF(n_user=n_user, n_item=n_item, n_feature=10, max_rating=5., min_rating=1., seed=0).fit(ratings, n_iters=20) RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2]) # training RMSE # predict ratings for user 0 and item 0 to 9: print(bpmf.predict(np.array([[0, i] for i in range(10)])))
ratings = load_movielens_1m_ratings('ml-1m/ratings.dat') n_user = max(ratings[:, 0]) n_item = max(ratings[:, 1]) ratings[:, (0, 1)] -= 1 #shift ids by 1 to let user_id &movie_id start from 0 #fit model bpmf = BPMF(n_user=n_user, n_item=n_item, n_feature=10, max_rating=5., min_rating=1., seed=0).fit(ratings, n_iters=5) #traing RMSE rmse = RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2]) print("RMSE= 1 ---", rmse) #predict rating for user 0 and item 0 to 9 #输入的用户id userId = 5 #输入要推荐的电影集合item 0 to endmovieNum-1 endmovieNum = n_item #输入要显示的前五个movie topN = 5 array = bpmf.predict(np.array([[userId, i] for i in xrange(endmovieNum)])) movies = load_movielens_movies('ml-1m/movies.dat') dictMovies = {} for i in movies[:endmovieNum]: dictMovies[int(i[0]) - 1] = str(i[1])
def test_rmse(self): np_test.assert_almost_equal(RMSE(np.ones(100), np.zeros(100)), np.sqrt(100. / 99.))
def test_rmse_same_input(self): rs = RandomState(0) data = rs.randn(100) np_test.assert_almost_equal(RMSE(data, data), 0.)