Ejemplo n.º 1
0
    def runTest(self):
        m1, m2 = numpy.random.random((4, 8)), numpy.random.random((4, 8))
        self.assertTrue(abs(self.cf.evaluator.get_rmse(m1, m2) - numpy.sqrt(mean_squared_error(m1, m2))) < 1e-6)
        train, test = self.cf.evaluator.naive_split()
        self.assertEqual(numpy.count_nonzero(train) + numpy.count_nonzero(test),
                         numpy.count_nonzero(self.ratings_matrix))

        test_indices = self.cf.evaluator.get_kfold_indices()
        # k = 3
        first_fold_indices = test_indices[0::self.k_folds]
        second_fold_indices = test_indices[1::self.k_folds]
        third_fold_indices = test_indices[2::self.k_folds]
        train1, test1 = self.cf.evaluator.generate_kfold_matrix(first_fold_indices)
        train2, test2 = self.cf.evaluator.generate_kfold_matrix(second_fold_indices)
        train3, test3 = self.cf.evaluator.generate_kfold_matrix(third_fold_indices)

        total_ratings = numpy.count_nonzero(self.ratings_matrix)

        # ensure that each fold has 1/k of the total ratings
        k_inverse = 1 / self.k_folds
        self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6)
        self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6)
        self.assertTrue(abs(k_inverse - ((numpy.count_nonzero(test1)) / total_ratings)) < 1e-6)

        # assert that the folds don't intertwine
        self.assertTrue(numpy.all((train1 * test1) == 0))
        self.assertTrue(numpy.all((train2 * test2) == 0))
        self.assertTrue(numpy.all((train3 * test3) == 0))
        # assert that test sets dont contain the same elements
        self.assertTrue(numpy.all((test1 * test2) == 0))
        self.assertTrue(numpy.all((test2 * test3) == 0))
        self.assertTrue(numpy.all((test1 * test3) == 0))

        evaluator = Evaluator(self.ratings_matrix)
        self.assertEqual(self.predictions.shape, self.ratings_matrix.shape)
        recall = evaluator.calculate_recall(self.ratings_matrix, self.predictions)
        # if predictions are  perfect
        if recall == 1:
            for row in range(self.users):
                for col in range(self.documents):
                    self.assertEqual(self.rounded_predictions[row, col], self.ratings_matrix[row, col])

        self.setUp()
        evaluator.ratings = self.ratings_matrix.copy()

        # restore the unmodified rating matrix
        self.setUp()
        evaluator.ratings = self.ratings_matrix.copy()

        # mrr will always decrease as we set the highest prediction's index
        # to 0 in the rating matrix. top_n recommendations set to 0.
        mrr = []
        for i in range(self.users):
            evaluator.ratings[i, (numpy.argmax(self.predictions[i], axis=0))] = 0
            mrr.append(evaluator.calculate_mrr(self.n_recommendations, self.predictions,
                                               self.rounded_predictions, evaluator.ratings))
            if i > 1:
                self.assertLessEqual(mrr[i], mrr[i-1])
Ejemplo n.º 2
0
    def runTest(self):
        train, test = self.cf.evaluator.naive_split()
        self.assertEqual(
            numpy.count_nonzero(train) + numpy.count_nonzero(test),
            numpy.count_nonzero(self.ratings_matrix))

        train_indices, test_indices = self.cf.evaluator.get_kfold_indices()
        # k = 3
        first_fold_indices = train_indices[0::self.k_folds], test_indices[
            0::self.k_folds]
        second_fold_indices = train_indices[1::self.k_folds], test_indices[
            1::self.k_folds]
        third_fold_indices = train_indices[2::self.k_folds], test_indices[
            2::self.k_folds]

        train1, test1 = self.cf.evaluator.generate_kfold_matrix(
            first_fold_indices[0], first_fold_indices[1])
        train2, test2 = self.cf.evaluator.generate_kfold_matrix(
            second_fold_indices[0], second_fold_indices[1])
        train3, test3 = self.cf.evaluator.generate_kfold_matrix(
            third_fold_indices[0], third_fold_indices[1])

        total_ratings = numpy.count_nonzero(self.ratings_matrix)

        # ensure that each fold has 1/k of the total ratings
        k_inverse = (1 / self.k_folds)
        self.assertEqual(k_inverse, numpy.count_nonzero(test1) / total_ratings)

        self.assertEqual(k_inverse, numpy.count_nonzero(test2) / total_ratings)

        self.assertEqual(k_inverse, numpy.count_nonzero(test2) / total_ratings)

        # assert that the folds don't intertwine
        self.assertTrue(numpy.all((train1 * test1) == 0))
        self.assertTrue(numpy.all((train2 * test2) == 0))
        self.assertTrue(numpy.all((train3 * test3) == 0))
        # assert that test sets dont contain the same elements
        self.assertTrue(numpy.all((test1 * test2) == 0))
        self.assertTrue(numpy.all((test2 * test3) == 0))
        self.assertTrue(numpy.all((test1 * test3) == 0))

        evaluator = Evaluator(self.ratings_matrix)
        self.assertEqual(self.predictions.shape, self.ratings_matrix.shape)
        recall = evaluator.calculate_recall(self.ratings_matrix,
                                            self.predictions)
        # if predictions are  perfect
        if recall == 1:
            for row in range(self.users):
                for col in range(self.documents):
                    self.assertEqual(self.rounded_predictions[row, col],
                                     self.ratings_matrix[row, col])

        # If we modify all the top predictions for half the users,
        # recall should be 0.5 by definition
        for i in range(0, self.users, 2):
            evaluator.ratings[i, self.predictions[i].nonzero()[0]] = 0
        recall_at_x = evaluator.recall_at_x(self.n_recommendations,
                                            self.predictions,
                                            self.ratings_matrix,
                                            self.rounded_predictions)
        self.assertEqual(0.5, recall_at_x)

        self.setUp()
        evaluator.ratings[:] = self.ratings_matrix

        # removing all top hits, should yield ndcg of 0 as number of recs is 1.
        for i in range(0, self.users):
            evaluator.ratings[i, self.predictions[i].nonzero()[0]] = 0
        ndcg = evaluator.calculate_ndcg(self.n_recommendations,
                                        self.predictions, self.ratings_matrix,
                                        self.test_data)

        self.assertEqual(0.0, ndcg)

        # restore the unmodified rating matrix
        self.setUp()
        evaluator.ratings[:] = self.ratings_matrix

        # mrr will always decrease as we set the highest prediction's index
        # to 0 in the rating matrix. top_n recommendations set to 0.
        mrr = []
        for i in range(self.users):
            mrr.append(
                evaluator.calculate_mrr(self.n_recommendations,
                                        self.predictions,
                                        self.rounded_predictions,
                                        self.test_data))
            evaluator.ratings[i,
                              (numpy.argmax(self.predictions[i], axis=0))] = 0
            if i > 1:
                self.assertLessEqual(mrr[i], mrr[i - 1])