def test_compute_list(self): errors = [0.7, 0.1, 0.1, 1.1, 1.5] self.assertEqual(MeanAbsoluteError.compute_list(errors), 0.7) errors = [0, 0] self.assertEqual(MeanAbsoluteError.compute_list(errors), 0) errors = [0.7, 0.1, 0.1, None, 1.5] self.assertEqual(MeanAbsoluteError.compute_list(errors), 0.6) errors = [] self.assertEqual(MeanAbsoluteError.compute_list(errors), None)
k = 100 svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True) # predicted_rating = svd.predict(int(5), 'A1', 1, 10) # predicted_rating2 = svd.predict(int(1), 'A1', 1, 10) # print('Predicted rating', predicted_rating) # print('Predicted rating', predicted_rating2) records = ETLUtils.load_csv_file(file_name_header, '|') errors = [] for record in records: try: # print(record['user'], record['item'], record['rating']) user = record['user'] item = int(record['item']) predicted_rating = svd.predict(item, user, 1, 5) print(record['user'], record['item'], predicted_rating) # predicted_rating = round(predicted_rating) actual_rating = svd.get_matrix().value(item, user) error = abs(predicted_rating - actual_rating) errors.append(error) except KeyError: continue mean_absolute_error = MeanAbsoluteError.compute_list(errors) root_mean_square_error = RootMeanSquareError.compute_list(errors) print('Mean Absolute error: %f' % mean_absolute_error) print('Root mean square error: %f' % root_mean_square_error)
pre_normalize=None, mean_center=True, post_normalize=True) # predicted_rating = svd.predict(int(5), 'A1', 1, 10) # predicted_rating2 = svd.predict(int(1), 'A1', 1, 10) # print('Predicted rating', predicted_rating) # print('Predicted rating', predicted_rating2) records = ETLUtils.load_csv_file(file_name_header, '|') errors = [] for record in records: try: # print(record['user'], record['item'], record['rating']) user = record['user'] item = int(record['item']) predicted_rating = svd.predict(item, user, 1, 5) print(record['user'], record['item'], predicted_rating) # predicted_rating = round(predicted_rating) actual_rating = svd.get_matrix().value(item, user) error = abs(predicted_rating - actual_rating) errors.append(error) except KeyError: continue mean_absolute_error = MeanAbsoluteError.compute_list(errors) root_mean_square_error = RootMeanSquareError.compute_list(errors) print('Mean Absolute error: %f' % mean_absolute_error) print('Root mean square error: %f' % root_mean_square_error)
def perform_cross_validation( records, recommender, num_folds, cache_reviews=None, reviews_type=None): start_time = time.time() split = 1 - (1/float(num_folds)) total_mean_absolute_error = 0. total_mean_square_error = 0. total_coverage = 0. num_cycles = 0 for i in range(0, num_folds): print('Num cycles: %d' % i) start = float(i) / num_folds cluster_labels = None train_records, test_records = ETLUtils.split_train_test( records, split=split, start=start) if cache_reviews: train_reviews, test_reviews = ETLUtils.split_train_test( cache_reviews, split=split, start=start) if reviews_type is not None: cluster_labels = reviews_clusterer.cluster_reviews(test_reviews) recommender.reviews = train_reviews recommender.load(train_records) if cluster_labels is not None: separated_records = reviews_clusterer.split_list_by_labels( test_records, cluster_labels) if reviews_type == 'specific': test_records = separated_records[0] if reviews_type == 'generic': test_records = separated_records[1] _, errors, num_unknown_ratings = predict_rating_list(recommender, test_records) recommender.clear() mean_absolute_error = MeanAbsoluteError.compute_list(errors) root_mean_square_error = RootMeanSquareError.compute_list(errors) num_samples = len(test_records) coverage = float((num_samples - num_unknown_ratings) / num_samples) # print('Total length:', len(test)) # print('Unknown ratings:', num_unknown_ratings) # print('Coverage:', coverage) if mean_absolute_error is not None: total_mean_absolute_error += mean_absolute_error total_mean_square_error += root_mean_square_error total_coverage += coverage num_cycles += 1 else: print('Mean absolute error is None!!!') final_mean_absolute_error = total_mean_absolute_error / num_cycles final_root_squared_error = total_mean_square_error / num_cycles final_coverage = total_coverage / num_cycles execution_time = time.time() - start_time print('Final mean absolute error: %f' % final_mean_absolute_error) print('Final root mean square error: %f' % final_root_squared_error) print('Final coverage: %f' % final_coverage) print("--- %s seconds ---" % execution_time) result = { 'MAE': final_mean_absolute_error, 'RMSE': final_root_squared_error, 'Coverage': final_coverage, 'Execution time': execution_time } return result