Exemple #1
0
    def fit(self, data,test_split=True, test_portion=0.1, search_parameter_space=False):

        if not search_parameter_space:

            self.test_split = test_split
            self.__set_data(data, test_portion)

        print('Initializing features for Users and Items...')
        initial = initializer(self.user_ids, self.item_ids, self.initialization_method,
                              self.n_latent, 0,0)

        user_features, item_features = initial.initialize_latent_vectors()
        bu = dict([(key, 0) for key in self.train_data_user_ids])
        bi = dict([(key, 0) for key in self.train_data_item_ids])

        if not self.biased:
           global_mean = 0
        else:
            global_mean = self.all_ratings_in_train / len(self.train_data)

        for current_epoch in range(self.n_epochs):

            if self.verbose:
                print("Processing epoch {}".format(current_epoch))

            # (re)initialize nums and denoms to zero
            self.user_num = dict([(key, np.zeros(self.n_latent)) for key in self.train_data_user_ids])
            self.user_denom = dict([(key, np.zeros(self.n_latent)) for key in self.train_data_user_ids])
            self.item_num = dict([(key, np.zeros(self.n_latent)) for key in self.train_data_item_ids])
            self.item_denom  = dict([(key, np.zeros(self.n_latent)) for key in self.train_data_item_ids])

            for u, i, r in self.train_data:

                # compute current estimation and error
                dot = 0  # <q_i, p_u>
                for f in range(self.n_latent):
                    dot += user_features[u][f] * item_features[i][f]
                est = global_mean + bu[u] + bi[i] + dot
                err = r - est

                # Update biases
                if self.biased:
                    bu[u] += self.lr_bu * (err - self.reg_bu * bu[u])
                    bi[i] += self.lr_bi * (err - self.reg_bi * bi[i])

                # Compute numerators and denominators
                for f in range(self.n_latent):
                    self.user_num[u][f] += item_features[i][f] * r
                    self.user_denom[u][f] += item_features[i][f] * est
                    self.item_num[i][f] += user_features[u][f] * r
                    self.item_denom[i][f] += user_features[u][f] * est

            # Update user factors
            for u in self.train_data_user_ids:
                n_ratings = self.user_n_ratings[u]
                for f in range(self.n_latent):
                    self.user_denom[u][f] += n_ratings * self.reg_user_features * user_features[u][f]
                    user_features[u][f] *= self.user_num[u][f] / self.user_denom[u][f]

            # Update item factors
            for i in self.train_data_item_ids:
                n_ratings = self.item_n_ratings[i]
                for f in range(self.n_latent):
                    self.item_denom[i][f] += n_ratings * self.reg_item_features * item_features[i][f]
                    item_features[i][f] *= self.item_num[i][f] / self.item_denom[i][f]

            # Calculate errors
            #error_counter += 1
            train_error = Test.rmse_error(
                self.train_data, user_features, item_features)

            # Show error to Client
            if self.test_split:
                test_error = Test.rmse_error(
                    self.test_data, user_features, item_features)
                print('Epoch Number: {}/{} Training RMSE: {:.2f} Test RMSE: {}'.format(current_epoch+1, self.n_epochs,
                                                                                        train_error, test_error))

            else:
                print('Epoch Number: {}/{} Training RMSE: {:.2f}'.format(current_epoch+1, self.n_epochs,
                                                                            train_error))

        self.bu = bu
        self.bi = bi
        self.user_features = user_features
        self.item_features = item_features
Exemple #2
0
    def fit(self, data, test_split=True, test_portion=0.1, search_parameter_space=False):

        # Set train_data, test_data, user_ids etc. if search parameter is False
        # If True, this lets us search parameter space with the same train-test split
        if not search_parameter_space:

            self.test_split = test_split
            self.__set_data(data, test_portion)

        # Initialization
        print('Initializing features for Users and Items...')
        initial = initializer(self.user_ids, self.item_ids, self.initialization_method,
                              self.n_latent, self.init_mean, self.init_std)

        self.user_features, self.item_features = initial.initialize_latent_vectors()

        # Training
        print('Starting training...')
        error_counter = 0
        for epoch in range(self.max_epoch):

            # updating user and item features
            for user, item, rating in self.train_data:

                error = rating - \
                    np.dot(self.user_features[user], self.item_features[item])
                # Use temp to update each item and user feature in sync.
                temp = self.user_features[user]

                # Update user and item feature for each user, item and rating pair
                self.user_features[user] += self.learning_rate * \
                    (error * self.item_features[item] -
                     self.regularization * self.user_features[user])
                self.item_features[item] += self.learning_rate * \
                    (error * temp - self.regularization *
                     self.item_features[item])

            # Calculate errors
            error_counter += 1
            train_error = Test.rmse_error(
                self.train_data, self.user_features, self.item_features)

            # Show error to Client
            if self.test_split:
                test_error = Test.rmse_error(
                    self.test_data, self.user_features, self.item_features)
                print('Epoch Number: {}/{} Training RMSE: {:.2f} Test RMSE: {}'.format(epoch+1, self.max_epoch,
                                                                                       train_error, test_error))

            else:
                print('Epoch Number: {}/{} Training RMSE: {:.2f}'.format(epoch+1, self.max_epoch,
                                                                         train_error))

            # Save best features depending on test_error
            if self.test_split and test_error < self.min_test_error:
                self.min_test_error = test_error
                self.best_user_features = copy.deepcopy(self.user_features)
                self.best_item_features = copy.deepcopy(self.item_features)

                error_counter = 0
            # Save best features if test data is False
            elif not self.test_split and train_error < self.min_train_error:
                self.min_train_error = train_error
                self.best_user_features = copy.deepcopy(self.user_features)
                self.best_item_features = copy.deepcopy(self.item_features)

            # Break if test_error didn't improve for the last n rounds and early stopping is true
            if self.early_stopping and error_counter >= self.early_stopping:

                print("Test error didn't get lower for the last {} epochs. Training is stopped.".format(
                    error_counter))
                print('Best test error is: {:.2f}. Best features are saved.'.format(
                    self.min_test_error))
                break

        print('Training has ended...')
        self.user_features = copy.deepcopy(self.best_user_features)
        self.item_features = copy.deepcopy(self.best_item_features)
Exemple #3
0
    def fit(self, data,test_split=True, test_portion=0.1, search_parameter_space=False):
        lr_bu = self.lr_bu
        lr_bi = self.lr_bi
        lr_pu = self.lr_pu
        lr_qi = self.lr_qi
        lr_yj = self.lr_yj

        reg_bu = self.reg_bu
        reg_bi = self.reg_bi
        reg_pu = self.reg_pu
        reg_qi = self.reg_qi
        reg_yj = self.reg_yj

        if not search_parameter_space:

            self.test_split = test_split
            self.__set_data(data, test_portion)

        print('Initializing features for Users and Items...')
        initial = initializer(self.user_ids, self.item_ids, self.initialization_method,
                              self.n_latent, self.init_mean, self.init_std)

        self.pu, self.qi = initial.initialize_latent_vectors(initalization_method='normal')
        _, self.yj = initial.initialize_latent_vectors(initalization_method='normal')

        self.bu = dict([(key, 0) for key in self.train_data_user_ids])
        self.bi = dict([(key, 0) for key in self.train_data_item_ids])

        if not self.biased:
           global_mean = 0
        else:
            global_mean = self.all_ratings_in_train / len(self.train_data)

        for current_epoch in range(self.n_epochs):
            if self.verbose:
                print(" processing epoch {}".format(current_epoch))

            for u, i, r in self.train_data:

                # items rated by u
                self.Iu = [items for items in self.user_existing_ratings[u]]
                self.sqrt_Iu = np.sqrt(len(self.Iu))

                # implicit feedback
                self.u_impl_fdb = np.zeros(self.n_latent, np.double)
                for j in self.Iu:
                    for f in range(self.n_latent):
                        self.u_impl_fdb[f] += self.yj[j][f] / self.sqrt_Iu

                # compute current error
                dot = 0
                for f in range(self.n_latent):
                    dot += self.qi[i][f] * (self.pu[u][f] + self.u_impl_fdb[f])

                err = r - (global_mean + self.bu[u] + self.bi[i] + dot)

                # update biases
                self.bu[u] += lr_bu * (err - reg_bu * self.bu[u])
                self.bi[i] += lr_bi * (err - reg_bi * self.bi[i])

                # update factors
                for f in range(self.n_latent):
                    
                    puf = self.pu[u][f]
                    qif = self.qi[i][f]
                    self.pu[u][f] += lr_pu * (err * qif - reg_pu * puf)
                    self.qi[i][f] += lr_qi * (err * puf - reg_qi * qif)
                    for j in self.Iu:
                        self.yj[j][f] += lr_yj * (err * qif / self.sqrt_Iu -
                                             reg_yj * self.yj[j][f])
            # Calculate errors
            #error_counter += 1
            train_error = Test.rmse_error(
                self.train_data, self.pu, self.qi)

            # Show error to Client
            if self.test_split:
                test_error = Test.rmse_error(
                    self.test_data, self.pu, self.qi)
                print('Epoch Number: {}/{} Training RMSE: {:.2f} Test RMSE: {}'.format(current_epoch+1, self.n_epochs,
                                                                                        train_error, test_error))

            else:
                print('Epoch Number: {}/{} Training RMSE: {:.2f}'.format(current_epoch+1, self.n_epochs,
                                                                            train_error))

            self.bu = self.bu
            self.bi = self.bi
            self.pu = self.pu
            self.qi = self.qi
            self.yj = self.yj