def test_load_movielens_1m_ratings(self): test_rating_file = os.path.join(TEST_DATA_DIR, TEST_ML_1M_RATING_FILE) ratings = load_movielens_1m_ratings(test_rating_file) n_row, n_col = ratings.shape self.assertEqual(n_row, 1000) self.assertEqual(n_col, 3) np_test.assert_array_equal(ratings[0], [1, 1193, 5]) np_test.assert_array_equal(ratings[-1], [10, 1022, 5])
zip_name, _ = urllib.request.urlretrieve(ML_1M_URL, "ml-1m.zip") with zipfile.ZipFile(zip_name, 'r') as zf: file_path = zf.extract('ml-1m/ratings.dat') # check file statinfo = os.stat(file_path) if statinfo.st_size == file_size: print('verify success: %s' % file_path) else: raise Exception('verify failed: %s' % file_path) return file_path # load or download MovieLens 1M dataset rating_file = ml_1m_download(ML_1M_FOLDER, file_size=ML_1M_ZIP_SIZE) ratings = load_movielens_1m_ratings(rating_file) n_user = max(ratings[:, 0]) n_item = max(ratings[:, 1]) # shift user_id & movie_id by 1. let user_id & movie_id start from 0 ratings[:, (0, 1)] -= 1 plt.imshow(ratings, cmap='jet', interpolation='nearest') # split data to training & testing train_pct = 0.9 rand_state.shuffle(ratings) train_size = int(train_pct * ratings.shape[0]) train = ratings[:train_size] validation = ratings[train_size:]
# -*- coding: utf-8 -*- """ Created on Sun May 26 17:34:22 2019 @author: dblab """ import numpy as np from recommend.bpmf import BPMF from recommend.utils.evaluation import RMSE from recommend.utils.datasets import load_movielens_1m_ratings # load user ratings ratings = load_movielens_1m_ratings('ml-1m/ratings.dat') n_user = max(ratings[:, 0]) n_item = max(ratings[:, 1]) ratings[:, (0, 1)] -= 1 # shift ids by 1 to let user_id & movie_id start from 0 # fit model bpmf = BPMF(n_user=n_user, n_item=n_item, n_feature=10, max_rating=5., min_rating=1., seed=0).fit(ratings, n_iters=20) RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2]) # training RMSE # predict ratings for user 0 and item 0 to 9: print(bpmf.predict(np.array([[0, i] for i in range(10)])))
print("file %s not exists. downloading..." % file_path) zip_name, _ = urllib.request.urlretrieve(ML_1M_URL, "ml-1m.zip") with zipfile.ZipFile(zip_name, 'r') as zf: file_path = zf.extract('ml-1m/ratings.dat') # check file statinfo = os.stat(file_path) if statinfo.st_size == file_size: print('verify success: %s' % file_path) else: raise Exception('verify failed: %s' % file_path) return file_path # load or download MovieLens 1M dataset rating_file = ml_1m_download(ML_1M_FOLDER, file_size=ML_1M_ZIP_SIZE) ratings = load_movielens_1m_ratings(rating_file) n_user = max(ratings[:, 0]) n_item = max(ratings[:, 1]) # shift user_id & movie_id by 1. let user_id & movie_id start from 0 ratings[:, (0, 1)] -= 1 # split data to training & testing train_pct = 0.9 rand_state.shuffle(ratings) train_size = int(train_pct * ratings.shape[0]) train = ratings[:train_size] validation = ratings[train_size:] # models settings n_feature = 10