Ejemplo n.º 1
0
def main(input_filename, output_filename):
    reviews = pyrecoutils.load_ratings(input_filename)
    training_set, validation_set, test_set = pyrecoutils.split_sets(reviews)

    model = UserBias()
    model.fit(training_set)
    model.select(validation_set)

    with pyrecoutils.PredictionWriter(output_filename) as out:
        training_prediction = [model.predict(u) for u, i, r in training_set]
        out.dump(training_set, training_prediction)
        training_rmse = pyrecoutils.rmse(training_set, training_prediction)
        out.space_line()

        validation_prediction = [
            model.predict(u) for u, i, r in validation_set
        ]
        out.dump(validation_set, validation_prediction)
        validation_rmse = pyrecoutils.rmse(validation_set,
                                           validation_prediction)
        out.space_line()

        test_prediction = [model.predict(u) for u, i, r in test_set]
        out.dump(test_set, test_prediction)
        test_rmse = pyrecoutils.rmse(test_set, test_prediction)
    logging.info(u"RMSE: %.4f %.4f %.4f", training_rmse, validation_rmse,
                 test_rmse)
Ejemplo n.º 2
0
def main(input_filename, output_filename, components, epochs, learning_rate,
         decay_rate, user_l2, item_l2, seed):
    reviews = pyrecoutils.load_ratings(input_filename)
    training_set, validation_set, test_set = pyrecoutils.split_sets(reviews)

    model = MatrixFactorization(components, epochs, learning_rate, decay_rate,
                                user_l2, item_l2, seed)
    model.fit(training_set, validation_set, test_set)
    model.select(validation_set)

    with pyrecoutils.PredictionWriter(output_filename) as out:
        training_prediction = [model.predict(u, i) for u, i, r in training_set]
        out.dump(training_set, training_prediction)
        training_rmse = pyrecoutils.rmse(training_set, training_prediction)
        out.space_line()

        validation_prediction = [
            model.predict(u, i) for u, i, r in validation_set
        ]
        out.dump(validation_set, validation_prediction)
        validation_rmse = pyrecoutils.rmse(validation_set,
                                           validation_prediction)
        out.space_line()

        test_prediction = [model.predict(u, i) for u, i, r in test_set]
        out.dump(test_set, test_prediction)
        test_rmse = pyrecoutils.rmse(test_set, test_prediction)
    logging.info(u"RMSE: %.4f %.4f %.4f", training_rmse, validation_rmse,
                 test_rmse)
Ejemplo n.º 3
0
 def fit(self, training_reviews, validation_reviews, test_reviews):
     self._initialize(training_reviews)
     for epoch in range(self.epochs):
         for u, i, r in training_reviews:
             b_u, b_i = self.user_biases[u], self.item_biases[i]
             p_u, q_i = self.user_latent[u], self.item_latent[i]
             delta = self.overall + b_u + b_i + np.dot(p_u, q_i) - r
             self.user_biases[u] -= self.learning_rate * (
                 delta + self.user_l2 * b_u)
             self.user_latent[u] -= self.learning_rate * (
                 delta * q_i + self.user_l2 * p_u)
             self.item_biases[i] -= self.learning_rate * (
                 delta + self.item_l2 * b_i)
             self.item_latent[i] -= self.learning_rate * (
                 delta * p_u + self.item_l2 * q_i)
         self.learning_rate *= self.decay_rate
         training_rmse = pyrecoutils.rmse(
             training_reviews,
             [self.predict(u, i) for u, i, r in training_reviews])
         validation_rmse = pyrecoutils.rmse(
             validation_reviews,
             [self.predict(u, i) for u, i, r in validation_reviews])
         test_rmse = pyrecoutils.rmse(
             test_reviews, [self.predict(u, i) for u, i, r in test_reviews])
         logging.info(u"Epoch % 4d - RMSE %f %f %f", epoch, training_rmse,
                      validation_rmse, test_rmse)
Ejemplo n.º 4
0
 def select(self, reviews):
     logging.info(u"Selecting threshold on %d reviews", len(reviews))
     rmses = []
     for t in range(10):
         self.threshold = t
         predictions = [self.predict(u) for u, i, r in reviews]
         rmses.append(pyrecoutils.rmse(reviews, predictions))
     threshold = np.argmin(rmses)
     logging.info(u"Selected threshold: %f", threshold)
     self.threshold = threshold
Ejemplo n.º 5
0
def main(input_filename, output_filename):
    reviews = pyrecoutils.load_ratings(input_filename)
    training_set, validation_set, test_set = pyrecoutils.split_sets(reviews)

    model = OverallBias()
    model.fit(training_set)

    with pyrecoutils.PredictionWriter(output_filename) as out:
        out.dump(training_set, [model.bias] * len(training_set))
        training_rmse = pyrecoutils.rmse(training_set,
                                         [model.bias] * len(training_set))
        out.space_line()
        out.dump(validation_set, [model.bias] * len(validation_set))
        validation_rmse = pyrecoutils.rmse(validation_set,
                                           [model.bias] * len(validation_set))
        out.space_line()
        out.dump(test_set, [model.bias] * len(test_set))
        test_rmse = pyrecoutils.rmse(test_set, [model.bias] * len(test_set))
    logging.info(u"RMSE: %.4f %.4f %.4f", training_rmse, validation_rmse,
                 test_rmse)
Ejemplo n.º 6
0
 def select(self, reviews):
     logging.info(u"Selecting threshold on %d reviews", len(reviews))
     rmses = []
     for t_u in range(10):
         self.user_threshold = t_u
         for t_i in range(10):
             self.item_threshold = t_i
             predictions = [self.predict(u, i) for u, i, r in reviews]
             rmses.append(pyrecoutils.rmse(reviews, predictions))
     threshold = np.argmin(rmses)
     user_threshold = threshold / 10
     item_threshold = threshold % 10
     logging.info(u"Selected user threshold: %f", user_threshold)
     self.user_threshold = user_threshold
     logging.info(u"Selected item threshold: %f", item_threshold)
     self.item_threshold = item_threshold