Example #1
0
    def test_load_movielens_1m_ratings(self):
        test_rating_file = os.path.join(TEST_DATA_DIR, TEST_ML_1M_RATING_FILE)
        ratings = load_movielens_1m_ratings(test_rating_file)

        n_row, n_col = ratings.shape
        self.assertEqual(n_row, 1000)
        self.assertEqual(n_col, 3)
        np_test.assert_array_equal(ratings[0], [1, 1193, 5])
        np_test.assert_array_equal(ratings[-1], [10, 1022, 5])
Example #2
0
    def test_load_movielens_1m_ratings(self):
        test_rating_file = os.path.join(TEST_DATA_DIR, TEST_ML_1M_RATING_FILE)
        ratings = load_movielens_1m_ratings(test_rating_file)

        n_row, n_col = ratings.shape
        self.assertEqual(n_row, 1000)
        self.assertEqual(n_col, 3)
        np_test.assert_array_equal(ratings[0], [1, 1193, 5])
        np_test.assert_array_equal(ratings[-1], [10, 1022, 5])
Example #3
0
        zip_name, _ = urllib.request.urlretrieve(ML_1M_URL, "ml-1m.zip")
        with zipfile.ZipFile(zip_name, 'r') as zf:
            file_path = zf.extract('ml-1m/ratings.dat')

    # check file
    statinfo = os.stat(file_path)
    if statinfo.st_size == file_size:
        print('verify success: %s' % file_path)
    else:
        raise Exception('verify failed: %s' % file_path)
    return file_path


# load or download MovieLens 1M dataset
rating_file = ml_1m_download(ML_1M_FOLDER, file_size=ML_1M_ZIP_SIZE)
ratings = load_movielens_1m_ratings(rating_file)
n_user = max(ratings[:, 0])
n_item = max(ratings[:, 1])

# shift user_id & movie_id by 1. let user_id & movie_id start from 0
ratings[:, (0, 1)] -= 1

plt.imshow(ratings, cmap='jet', interpolation='nearest')

# split data to training & testing
train_pct = 0.9
rand_state.shuffle(ratings)
train_size = int(train_pct * ratings.shape[0])
train = ratings[:train_size]
validation = ratings[train_size:]
Example #4
0
# -*- coding: utf-8 -*-
"""
Created on Sun May 26 17:34:22 2019

@author: dblab
"""

import numpy as np
from recommend.bpmf import BPMF
from recommend.utils.evaluation import RMSE
from recommend.utils.datasets import load_movielens_1m_ratings

# load user ratings
ratings = load_movielens_1m_ratings('ml-1m/ratings.dat')
n_user = max(ratings[:, 0])
n_item = max(ratings[:, 1])
ratings[:,
        (0, 1)] -= 1  # shift ids by 1 to let user_id & movie_id start from 0

# fit model
bpmf = BPMF(n_user=n_user,
            n_item=n_item,
            n_feature=10,
            max_rating=5.,
            min_rating=1.,
            seed=0).fit(ratings, n_iters=20)
RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2])  # training RMSE

# predict ratings for user 0 and item 0 to 9:
print(bpmf.predict(np.array([[0, i] for i in range(10)])))
        print("file %s not exists. downloading..." % file_path)
        zip_name, _ = urllib.request.urlretrieve(ML_1M_URL, "ml-1m.zip")
        with zipfile.ZipFile(zip_name, 'r') as zf:
            file_path = zf.extract('ml-1m/ratings.dat')

    # check file
    statinfo = os.stat(file_path)
    if statinfo.st_size == file_size:
        print('verify success: %s' % file_path)
    else:
        raise Exception('verify failed: %s' % file_path)
    return file_path

# load or download MovieLens 1M dataset
rating_file = ml_1m_download(ML_1M_FOLDER, file_size=ML_1M_ZIP_SIZE)
ratings = load_movielens_1m_ratings(rating_file)
n_user = max(ratings[:, 0])
n_item = max(ratings[:, 1])

# shift user_id & movie_id by 1. let user_id & movie_id start from 0
ratings[:, (0, 1)] -= 1

# split data to training & testing
train_pct = 0.9
rand_state.shuffle(ratings)
train_size = int(train_pct * ratings.shape[0])
train = ratings[:train_size]
validation = ratings[train_size:]

# models settings
n_feature = 10