Exemple #1
0
    def load(self, ratings, train, validation):

        self.train = train
        self.validation = validation
        self.num_users = movielens_extractor.get_num_users(ratings)
        self.num_items = movielens_extractor.get_num_items(ratings)
        self.train = train
        self.validation = validation

        self.mean_rating = np.mean(self.train[:, 2])
        self.ratings_test = np.float64(validation[:, 2])
        self.item_features = 0.1 * NormalRandom.generate_matrix(
            self.num_items, self.num_features)
        self.user_features = 0.1 * NormalRandom.generate_matrix(
            self.num_users, self.num_features)

        self.df_post_item = self.df_item + self.num_items
        self.df_post_user = self.df_user + self.num_users

        # num_user, num_item, ratings = build_ml_1m()
        self.matrix = build_rating_matrix(self.num_users, self.num_items,
                                          train)
        self.matrix = self.matrix.T
        self.counter_prob = 1
        self.probe_rat_all = self.pred(self.item_features, self.user_features,
                                       self.validation, self.mean_rating)

        self.estimate()
Exemple #2
0
    def update_user_features(self):
        self.matrix = self.matrix.T
        # print('matrix', self.matrix.shape, self.matrix[0:10, 0:5])

        # Gibbs sampling for user features
        for user_id in range(self.num_users):
            self.results_file.write('User %d\n' % (user_id + 1))
            items = self.matrix[:, user_id] > 0.0
            features = self.item_features[items, :]
            ratings = self.matrix[items, user_id] - self.mean_rating
            rating_len = len(ratings)
            ratings = np.reshape(ratings, (rating_len, 1))

            covar = inv(self.alpha_user +
                        self.beta * np.dot(features.T, features))

            temp = self.beta * \
                np.dot(features.T, ratings) + np.dot(
                    self.alpha_user, self.mu_user)
            mean = np.dot(covar, temp)
            lam = cholesky(covar)
            temp_feature = mean + np.dot(
                lam, NormalRandom.generate_matrix(self.num_features, 1))
            temp_feature = np.reshape(temp_feature, (self.num_features, ))
            self.user_features[user_id, :] = temp_feature

        self.results_file.write(
            'user_features \t (%d,%d) \t %16.16f\n' %
            (self.user_features.shape[0], self.user_features.shape[1],
             self.user_features[0, 0]))
Exemple #3
0
def sample_wishart(sigma, dof):
    '''
    Returns a sample from the Wishart distn, conjugate prior for precision matrices.
    '''

    n = sigma.shape[0]

    chol = np.linalg.cholesky(sigma).T

    rnd_matrix = NormalRandom.generate_matrix(dof, n)
    X = np.dot(rnd_matrix, chol)
    W = np.dot(X.T, X)

    return W
Exemple #4
0
    def udpate_item_features(self):
        self.matrix = self.matrix.T
        # Gibbs sampling for item features
        for item_id in range(self.num_items):
            self.results_file.write('Item %d\n' % (item_id + 1))
            users = self.matrix[:, item_id] > 0.0
            features = self.user_features[users, :]
            ratings = self.matrix[users, item_id] - self.mean_rating
            rating_len = len(ratings)
            ratings = np.reshape(ratings, (rating_len, 1))

            covar = inv(self.alpha_item +
                        self.beta * np.dot(features.T, features))
            lam = cholesky(covar)
            temp = self.beta * \
                np.dot(features.T, ratings) + np.dot(
                    self.alpha_item, self.mu_item)
            mean = np.dot(covar, temp)
            temp_feature = mean + np.dot(
                lam, NormalRandom.generate_matrix(self.num_features, 1))
            temp_feature = np.reshape(temp_feature, (self.num_features, ))
            self.item_features[item_id, :] = temp_feature
Exemple #5
0
    def update_user_params(self):
        x_bar = np.mean(self.user_features, 0).T
        x_bar = np.reshape(x_bar, (self.num_features, 1))
        S_bar = np.cov(self.user_features.T)
        norm_X_bar = self.mu0_user - x_bar

        WI_post = inv(inv(self.WI_user) + self.num_users * S_bar + \
            np.dot(norm_X_bar, norm_X_bar.T) * \
            (self.num_users * self.beta_user) / (self.beta_user + self.num_users))

        # Not sure why we need this...
        WI_post = (WI_post + WI_post.T) / 2.0

        # update alpha_user
        self.alpha_user = sample_wishart(WI_post, self.df_post_user)

        # update mu_item
        mu_temp = (self.beta_user * self.mu0_user + self.num_users * x_bar) / \
            (self.beta_user + self.num_users)
        lam = cholesky(inv(
            (self.beta_user + self.num_users) * self.alpha_user))
        self.mu_user = mu_temp + np.dot(
            lam, NormalRandom.generate_matrix(self.num_features, 1))