def test_bpmf_with_random_data(self): n_user = 1000 n_item = 2000 ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_bpmf_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) bpmf1.fit(ratings, n_iters=5) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) bpmf2.fit(ratings, n_iters=5) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_bpmf_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-3) bpmf1.fit(ratings, n_iters=5) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, n_feature, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) bpmf2.fit(ratings, n_iters=5) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_bpmf_with_ml_100k_rating(self): n_user = 943 n_item = 1682 n_feature = 10 ratings = self.ratings bpmf = BPMF(n_user, n_item, n_feature, max_rating=5., min_rating=1., seed=self.seed) bpmf.fit(ratings, n_iters=30) rmse = RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse < 0.85)
def test_bpmf_with_ml_100k_rating(self): n_user = 943 n_item = 1682 n_feature = 10 ratings = self.ratings bpmf = BPMF(n_user, n_item, n_feature, max_rating=5., min_rating=1., seed=self.seed) bpmf.fit(ratings, n_iters=15) rmse = RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse < 0.85)
def test_bpmf_with_random_data(self): n_user = 1000 n_item = 2000 ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) bpmf1 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf1.fit(ratings, n_iters=1) rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2]) bpmf2 = BPMF(n_user, n_item, self.n_feature, max_rating=self.max_rat, min_rating=self.min_rat, seed=self.seed) bpmf2.fit(ratings, n_iters=3) rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
# models settings; do now the loop over several n_features. results = pd.DataFrame( columns=['Number of features', 'Train RMSE', 'Test RMSE']) n_features_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100] eval_iters = 50 for n_feature in n_features_list: print( "n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % (n_user, n_item, n_feature, train.shape[0], validation.shape[0])) bpmf = BPMF(n_user=n_user, n_item=n_item, n_feature=n_feature, max_rating=5., min_rating=1., seed=0) train_rmse_list, test_rmse_list = bpmf.fit(train, validation, n_iters=eval_iters) row = pd.DataFrame({ 'Number of features': n_feature, 'Train RMSE': train_rmse_list, 'Test RMSE': test_rmse_list }) results = results.append(row) results.to_csv("results/1M_movielens_features{}_iterations{}.csv".format( n_features_list, eval_iters))
ratings = load_movielens_1m_ratings(rating_file) n_user = max(ratings[:, 0]) n_item = max(ratings[:, 1]) # shift user_id & movie_id by 1. let user_id & movie_id start from 0 ratings[:, (0, 1)] -= 1 # split data to training & testing train_pct = 0.9 rand_state.shuffle(ratings) train_size = int(train_pct * ratings.shape[0]) train = ratings[:train_size] validation = ratings[train_size:] # models settings n_feature = 20 eval_iters = 50 print("n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % ( n_user, n_item, n_feature, train.shape[0], validation.shape[0])) bpmf = BPMF(n_user=n_user, n_item=n_item, n_feature=n_feature, max_rating=5., min_rating=1., seed=0) bpmf.fit(train, n_iters=eval_iters) train_preds = bpmf.predict(train[:, :2]) train_rmse = RMSE(train_preds, train[:, 2]) val_preds = bpmf.predict(validation[:, :2]) val_rmse = RMSE(val_preds, validation[:, 2]) print("after %d iteration, train RMSE: %.6f, validation RMSE: %.6f" % (eval_iters, train_rmse, val_rmse))