def test_als_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) als1.fit(ratings, n_iters=10) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) als2.fit(ratings, n_iters=10) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_als_convergence(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-2) als1.fit(ratings, n_iters=10) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat, converge=1e-1) als2.fit(ratings, n_iters=10) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 < rmse_2)
def test_als_with_random_data(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=1) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_als_with_missing_data(self): n_user = 10 n_item = 20 n_feature = self.n_feature ratings = make_ratings(n_user - 1, n_item - 1, 5, 10, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) unuse_user_f_before = als1.user_features_[n_user - 1, :] unuse_item_f_before = als1.item_features_[n_item - 1, :] als1.fit(ratings, n_iters=1) unuse_user_f_after = als1.user_features_[n_user - 1, :] unuse_item_f_after = als1.item_features_[n_item - 1, :] # last user/item feature should be # unchanged since no rating data on them assert_array_equal(unuse_user_f_before, unuse_user_f_after) assert_array_equal(unuse_item_f_before, unuse_item_f_after)
def test_als_with_random_data(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=1) rmse_1 = RMSE(als1.predict(ratings[:, :2]), ratings[:, 2]) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) rmse_2 = RMSE(als2.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse_1 > rmse_2)
def test_als_with_ml_100k_rating(self): n_user = 943 n_item = 1682 n_feature = 10 ratings = self.ratings als = ALS(n_user, n_item, n_feature, reg=1e-2, max_rating=5., min_rating=1., seed=self.seed) als.fit(ratings, n_iters=5) rmse = RMSE(als.predict(ratings[:, :2]), ratings[:, 2]) self.assertTrue(rmse < 0.8)
def test_als_with_missing_data(self): n_user = 10 n_item = 20 n_feature = self.n_feature ratings = make_ratings( n_user - 1, n_item - 1, 5, 10, self.rating_choices, seed=self.seed) als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) unuse_user_f_before = als1.user_features_[n_user - 1, :] unuse_item_f_before = als1.item_features_[n_item - 1, :] als1.fit(ratings, n_iters=1) unuse_user_f_after = als1.user_features_[n_user - 1, :] unuse_item_f_after = als1.item_features_[n_item - 1, :] # last user/item feature should be # unchanged since no rating data on them assert_array_equal(unuse_user_f_before, unuse_user_f_after) assert_array_equal(unuse_item_f_before, unuse_item_f_after)
def test_als_seed(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings( n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) # seed 0 als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=3) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) assert_array_equal(als1.user_features_, als2.user_features_) assert_array_equal(als1.item_features_, als2.item_features_) # seed 1 als3 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=1, max_rating=self.max_rat, min_rating=self.min_rat) als3.fit(ratings, n_iters=3) assert_raises(AssertionError, assert_array_equal, als1.user_features_, als3.user_features_) assert_raises(AssertionError, assert_array_equal, als1.item_features_, als3.item_features_)
def test_als_seed(self): n_user = 100 n_item = 200 n_feature = self.n_feature ratings = make_ratings(n_user, n_item, 20, 30, self.rating_choices, seed=self.seed) # seed 0 als1 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als1.fit(ratings, n_iters=3) als2 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=0, max_rating=self.max_rat, min_rating=self.min_rat) als2.fit(ratings, n_iters=3) assert_array_equal(als1.user_features_, als2.user_features_) assert_array_equal(als1.item_features_, als2.item_features_) # seed 1 als3 = ALS(n_user, n_item, n_feature, reg=1e-2, seed=1, max_rating=self.max_rat, min_rating=self.min_rat) als3.fit(ratings, n_iters=3) assert_raises(AssertionError, assert_array_equal, als1.user_features_, als3.user_features_) assert_raises(AssertionError, assert_array_equal, als1.item_features_, als3.item_features_)
validation = ratings[train_size:] print(train.shape, validation.shape) print(train[0, :], train[1, :]) # plt.imshow(train, cmap='jet', interpolation='nearest') # plt.show() # plt.imshow(validation, cmap='jet', interpolation='nearest') # plt.show() # models settings n_feature = 10 eval_iters = 10 print( "n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % (n_user, n_item, n_feature, train.shape[0], validation.shape[0])) als = ALS(n_user=n_user, n_item=n_item, n_feature=n_feature, reg=5e-2, max_rating=5., min_rating=1., seed=0) als.fit(train, n_iters=eval_iters) train_preds = als.predict(train[:, :2]) train_rmse = RMSE(train_preds, train[:, 2]) val_preds = als.predict(validation[:, :2]) val_rmse = RMSE(val_preds, validation[:, 2]) print("after %d iterations, train RMSE: %.6f, validation RMSE: %.6f" % \ (eval_iters, train_rmse, val_rmse))
rating_file = ml_1m_download(ML_1M_FOLDER, file_size=ML_1M_ZIP_SIZE) ratings = load_movielens_1m_ratings(rating_file) n_user = max(ratings[:, 0]) n_item = max(ratings[:, 1]) # shift user_id & movie_id by 1. let user_id & movie_id start from 0 ratings[:, (0, 1)] -= 1 # split data to training & testing train_pct = 0.9 rand_state.shuffle(ratings) train_size = int(train_pct * ratings.shape[0]) train = ratings[:train_size] validation = ratings[train_size:] # models settings n_feature = 10 eval_iters = 10 print("n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % ( n_user, n_item, n_feature, train.shape[0], validation.shape[0])) als = ALS(n_user=n_user, n_item=n_item, n_feature=n_feature, reg=5e-2, max_rating=5., min_rating=1., seed=0) als.fit(train, n_iters=eval_iters) train_preds = als.predict(train[:, :2]) train_rmse = RMSE(train_preds, train[:, 2]) val_preds = als.predict(validation[:, :2]) val_rmse = RMSE(val_preds, validation[:, 2]) print("after %d iterations, train RMSE: %.6f, validation RMSE: %.6f" % \ (eval_iters, train_rmse, val_rmse))