예제 #1
0
    def test_bpmf_with_random_data(self):
        n_user = 1000
        n_item = 2000
        ratings = make_ratings(n_user,
                               n_item,
                               20,
                               30,
                               self.rating_choices,
                               seed=self.seed)

        bpmf1 = BPMF(n_user,
                     n_item,
                     self.n_feature,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     seed=self.seed)

        bpmf1.fit(ratings, n_iters=1)
        rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2])

        bpmf2 = BPMF(n_user,
                     n_item,
                     self.n_feature,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     seed=self.seed)

        bpmf2.fit(ratings, n_iters=3)
        rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 > rmse_2)
예제 #2
0
    def test_bpmf_convergence(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(n_user,
                               n_item,
                               20,
                               30,
                               self.rating_choices,
                               seed=self.seed)

        bpmf1 = BPMF(n_user,
                     n_item,
                     n_feature,
                     seed=0,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     converge=1e-2)

        bpmf1.fit(ratings, n_iters=5)
        rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2])

        bpmf2 = BPMF(n_user,
                     n_item,
                     n_feature,
                     seed=0,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     converge=1e-1)

        bpmf2.fit(ratings, n_iters=5)
        rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 < rmse_2)
예제 #3
0
    def test_bpmf_convergence(self):
        n_user = 100
        n_item = 200
        n_feature = self.n_feature
        ratings = make_ratings(
            n_user, n_item, 20, 30, self.rating_choices, seed=self.seed)

        bpmf1 = BPMF(n_user, n_item, n_feature,
                     seed=0,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     converge=1e-3)

        bpmf1.fit(ratings, n_iters=5)
        rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2])

        bpmf2 = BPMF(n_user, n_item, n_feature,
                     seed=0,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     converge=1e-2)

        bpmf2.fit(ratings, n_iters=5)
        rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 < rmse_2)
예제 #4
0
    def test_bpmf_with_ml_100k_rating(self):
        n_user = 943
        n_item = 1682
        n_feature = 10
        ratings = self.ratings

        bpmf = BPMF(n_user, n_item, n_feature,
                    max_rating=5.,
                    min_rating=1.,
                    seed=self.seed)

        bpmf.fit(ratings, n_iters=30)
        rmse = RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse < 0.85)
예제 #5
0
    def test_bpmf_with_ml_100k_rating(self):
        n_user = 943
        n_item = 1682
        n_feature = 10
        ratings = self.ratings

        bpmf = BPMF(n_user,
                    n_item,
                    n_feature,
                    max_rating=5.,
                    min_rating=1.,
                    seed=self.seed)

        bpmf.fit(ratings, n_iters=15)
        rmse = RMSE(bpmf.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse < 0.85)
예제 #6
0
    def test_bpmf_with_random_data(self):
        n_user = 1000
        n_item = 2000
        ratings = make_ratings(
            n_user, n_item, 20, 30, self.rating_choices, seed=self.seed)

        bpmf1 = BPMF(n_user, n_item, self.n_feature,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     seed=self.seed)

        bpmf1.fit(ratings, n_iters=1)
        rmse_1 = RMSE(bpmf1.predict(ratings[:, :2]), ratings[:, 2])

        bpmf2 = BPMF(n_user, n_item, self.n_feature,
                     max_rating=self.max_rat,
                     min_rating=self.min_rat,
                     seed=self.seed)

        bpmf2.fit(ratings, n_iters=3)
        rmse_2 = RMSE(bpmf2.predict(ratings[:, :2]), ratings[:, 2])
        self.assertTrue(rmse_1 > rmse_2)
# models settings; do now the loop over several n_features.
results = pd.DataFrame(
    columns=['Number of features', 'Train RMSE', 'Test RMSE'])
n_features_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
eval_iters = 50

for n_feature in n_features_list:
    print(
        "n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d"
        % (n_user, n_item, n_feature, train.shape[0], validation.shape[0]))
    bpmf = BPMF(n_user=n_user,
                n_item=n_item,
                n_feature=n_feature,
                max_rating=5.,
                min_rating=1.,
                seed=0)

    train_rmse_list, test_rmse_list = bpmf.fit(train,
                                               validation,
                                               n_iters=eval_iters)

    row = pd.DataFrame({
        'Number of features': n_feature,
        'Train RMSE': train_rmse_list,
        'Test RMSE': test_rmse_list
    })

    results = results.append(row)
    results.to_csv("results/1M_movielens_features{}_iterations{}.csv".format(
        n_features_list, eval_iters))
ratings = load_movielens_1m_ratings(rating_file)
n_user = max(ratings[:, 0])
n_item = max(ratings[:, 1])

# shift user_id & movie_id by 1. let user_id & movie_id start from 0
ratings[:, (0, 1)] -= 1

# split data to training & testing
train_pct = 0.9

rand_state.shuffle(ratings)
train_size = int(train_pct * ratings.shape[0])
train = ratings[:train_size]
validation = ratings[train_size:]

# models settings
n_feature = 20
eval_iters = 50
print("n_user: %d, n_item: %d, n_feature: %d, training size: %d, validation size: %d" % (
    n_user, n_item, n_feature, train.shape[0], validation.shape[0]))
bpmf = BPMF(n_user=n_user, n_item=n_item, n_feature=n_feature,
            max_rating=5., min_rating=1., seed=0)

bpmf.fit(train, n_iters=eval_iters)
train_preds = bpmf.predict(train[:, :2])
train_rmse = RMSE(train_preds, train[:, 2])
val_preds = bpmf.predict(validation[:, :2])
val_rmse = RMSE(val_preds, validation[:, 2])
print("after %d iteration, train RMSE: %.6f, validation RMSE: %.6f" %
      (eval_iters, train_rmse, val_rmse))