Esempio n. 1
0
 def test_compute_list(self):
     errors = [0.7, 0.1, 0.1, 1.1, 1.5]
     self.assertEqual(MeanAbsoluteError.compute_list(errors), 0.7)
     errors = [0, 0]
     self.assertEqual(MeanAbsoluteError.compute_list(errors), 0)
     errors = [0.7, 0.1, 0.1, None, 1.5]
     self.assertEqual(MeanAbsoluteError.compute_list(errors), 0.6)
     errors = []
     self.assertEqual(MeanAbsoluteError.compute_list(errors), None)
Esempio n. 2
0
k = 100
svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue

mean_absolute_error = MeanAbsoluteError.compute_list(errors)
root_mean_square_error = RootMeanSquareError.compute_list(errors)
print('Mean Absolute error: %f' % mean_absolute_error)
print('Root mean square error: %f' % root_mean_square_error)
Esempio n. 3
0
            pre_normalize=None,
            mean_center=True,
            post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue

mean_absolute_error = MeanAbsoluteError.compute_list(errors)
root_mean_square_error = RootMeanSquareError.compute_list(errors)
print('Mean Absolute error: %f' % mean_absolute_error)
print('Root mean square error: %f' % root_mean_square_error)
Esempio n. 4
0
def perform_cross_validation(
        records, recommender, num_folds, cache_reviews=None, reviews_type=None):

    start_time = time.time()
    split = 1 - (1/float(num_folds))
    total_mean_absolute_error = 0.
    total_mean_square_error = 0.
    total_coverage = 0.
    num_cycles = 0

    for i in range(0, num_folds):
        print('Num cycles: %d' % i)
        start = float(i) / num_folds
        cluster_labels = None
        train_records, test_records = ETLUtils.split_train_test(
            records, split=split, start=start)
        if cache_reviews:
            train_reviews, test_reviews = ETLUtils.split_train_test(
                cache_reviews, split=split, start=start)
            if reviews_type is not None:
                cluster_labels = reviews_clusterer.cluster_reviews(test_reviews)
            recommender.reviews = train_reviews
        recommender.load(train_records)

        if cluster_labels is not None:
            separated_records = reviews_clusterer.split_list_by_labels(
                test_records, cluster_labels)
            if reviews_type == 'specific':
                test_records = separated_records[0]
            if reviews_type == 'generic':
                test_records = separated_records[1]

        _, errors, num_unknown_ratings = predict_rating_list(recommender, test_records)
        recommender.clear()
        mean_absolute_error = MeanAbsoluteError.compute_list(errors)
        root_mean_square_error = RootMeanSquareError.compute_list(errors)
        num_samples = len(test_records)
        coverage = float((num_samples - num_unknown_ratings) / num_samples)
        # print('Total length:', len(test))
        # print('Unknown ratings:', num_unknown_ratings)
        # print('Coverage:', coverage)

        if mean_absolute_error is not None:
            total_mean_absolute_error += mean_absolute_error
            total_mean_square_error += root_mean_square_error
            total_coverage += coverage
            num_cycles += 1
        else:
            print('Mean absolute error is None!!!')


    final_mean_absolute_error = total_mean_absolute_error / num_cycles
    final_root_squared_error = total_mean_square_error / num_cycles
    final_coverage = total_coverage / num_cycles
    execution_time = time.time() - start_time

    print('Final mean absolute error: %f' % final_mean_absolute_error)
    print('Final root mean square error: %f' % final_root_squared_error)
    print('Final coverage: %f' % final_coverage)
    print("--- %s seconds ---" % execution_time)

    result = {
        'MAE': final_mean_absolute_error,
        'RMSE': final_root_squared_error,
        'Coverage': final_coverage,
        'Execution time': execution_time
    }

    return result