def main(input_filename, output_filename): reviews = pyrecoutils.load_ratings(input_filename) training_set, validation_set, test_set = pyrecoutils.split_sets(reviews) model = UserBias() model.fit(training_set) model.select(validation_set) with pyrecoutils.PredictionWriter(output_filename) as out: training_prediction = [model.predict(u) for u, i, r in training_set] out.dump(training_set, training_prediction) training_rmse = pyrecoutils.rmse(training_set, training_prediction) out.space_line() validation_prediction = [ model.predict(u) for u, i, r in validation_set ] out.dump(validation_set, validation_prediction) validation_rmse = pyrecoutils.rmse(validation_set, validation_prediction) out.space_line() test_prediction = [model.predict(u) for u, i, r in test_set] out.dump(test_set, test_prediction) test_rmse = pyrecoutils.rmse(test_set, test_prediction) logging.info(u"RMSE: %.4f %.4f %.4f", training_rmse, validation_rmse, test_rmse)
def main(input_filename, output_filename, components, epochs, learning_rate, decay_rate, user_l2, item_l2, seed): reviews = pyrecoutils.load_ratings(input_filename) training_set, validation_set, test_set = pyrecoutils.split_sets(reviews) model = MatrixFactorization(components, epochs, learning_rate, decay_rate, user_l2, item_l2, seed) model.fit(training_set, validation_set, test_set) model.select(validation_set) with pyrecoutils.PredictionWriter(output_filename) as out: training_prediction = [model.predict(u, i) for u, i, r in training_set] out.dump(training_set, training_prediction) training_rmse = pyrecoutils.rmse(training_set, training_prediction) out.space_line() validation_prediction = [ model.predict(u, i) for u, i, r in validation_set ] out.dump(validation_set, validation_prediction) validation_rmse = pyrecoutils.rmse(validation_set, validation_prediction) out.space_line() test_prediction = [model.predict(u, i) for u, i, r in test_set] out.dump(test_set, test_prediction) test_rmse = pyrecoutils.rmse(test_set, test_prediction) logging.info(u"RMSE: %.4f %.4f %.4f", training_rmse, validation_rmse, test_rmse)
def fit(self, training_reviews, validation_reviews, test_reviews): self._initialize(training_reviews) for epoch in range(self.epochs): for u, i, r in training_reviews: b_u, b_i = self.user_biases[u], self.item_biases[i] p_u, q_i = self.user_latent[u], self.item_latent[i] delta = self.overall + b_u + b_i + np.dot(p_u, q_i) - r self.user_biases[u] -= self.learning_rate * ( delta + self.user_l2 * b_u) self.user_latent[u] -= self.learning_rate * ( delta * q_i + self.user_l2 * p_u) self.item_biases[i] -= self.learning_rate * ( delta + self.item_l2 * b_i) self.item_latent[i] -= self.learning_rate * ( delta * p_u + self.item_l2 * q_i) self.learning_rate *= self.decay_rate training_rmse = pyrecoutils.rmse( training_reviews, [self.predict(u, i) for u, i, r in training_reviews]) validation_rmse = pyrecoutils.rmse( validation_reviews, [self.predict(u, i) for u, i, r in validation_reviews]) test_rmse = pyrecoutils.rmse( test_reviews, [self.predict(u, i) for u, i, r in test_reviews]) logging.info(u"Epoch % 4d - RMSE %f %f %f", epoch, training_rmse, validation_rmse, test_rmse)
def select(self, reviews): logging.info(u"Selecting threshold on %d reviews", len(reviews)) rmses = [] for t in range(10): self.threshold = t predictions = [self.predict(u) for u, i, r in reviews] rmses.append(pyrecoutils.rmse(reviews, predictions)) threshold = np.argmin(rmses) logging.info(u"Selected threshold: %f", threshold) self.threshold = threshold
def main(input_filename, output_filename): reviews = pyrecoutils.load_ratings(input_filename) training_set, validation_set, test_set = pyrecoutils.split_sets(reviews) model = OverallBias() model.fit(training_set) with pyrecoutils.PredictionWriter(output_filename) as out: out.dump(training_set, [model.bias] * len(training_set)) training_rmse = pyrecoutils.rmse(training_set, [model.bias] * len(training_set)) out.space_line() out.dump(validation_set, [model.bias] * len(validation_set)) validation_rmse = pyrecoutils.rmse(validation_set, [model.bias] * len(validation_set)) out.space_line() out.dump(test_set, [model.bias] * len(test_set)) test_rmse = pyrecoutils.rmse(test_set, [model.bias] * len(test_set)) logging.info(u"RMSE: %.4f %.4f %.4f", training_rmse, validation_rmse, test_rmse)
def select(self, reviews): logging.info(u"Selecting threshold on %d reviews", len(reviews)) rmses = [] for t_u in range(10): self.user_threshold = t_u for t_i in range(10): self.item_threshold = t_i predictions = [self.predict(u, i) for u, i, r in reviews] rmses.append(pyrecoutils.rmse(reviews, predictions)) threshold = np.argmin(rmses) user_threshold = threshold / 10 item_threshold = threshold % 10 logging.info(u"Selected user threshold: %f", user_threshold) self.user_threshold = user_threshold logging.info(u"Selected item threshold: %f", item_threshold) self.item_threshold = item_threshold