def test_novelty(self): recommender = baseline.MostPopularRecommender(training_set, items) evaluator = sequeval.Evaluator(training_set, test_set, items, 3) self.assertAlmostEqual((math.log2(0.375) + math.log2(0.5) + math.log2(0.125)) / -3, evaluator.novelty(recommender)) evaluator = sequeval.Evaluator(training_set, test_set, items, 2) self.assertAlmostEqual((math.log2(0.375) + math.log2(0.5)) / -2, evaluator.novelty(recommender))
def test_serendipity(self): recommender = baseline.MostPopularRecommender(training_set, items) evaluator = sequeval.Evaluator(training_set, test_set, items, 3) self.assertEqual(0.0, evaluator.serendipity(recommender)) self.assertAlmostEqual(1 / 6, evaluator.serendipity(recommender, primitive_k=2)) evaluator = sequeval.Evaluator(training_set, test_set, items, 2) self.assertEqual(0.0, evaluator.serendipity(recommender)) self.assertAlmostEqual(3 / 6, evaluator.serendipity(recommender, primitive_k=1))
def test_diversity(self): recommender = baseline.MostPopularRecommender(training_set, items) similarity = sequeval.CosineSimilarity(training_set, items) # Compute the possible diversities d1 = 1 - similarity.similarity(2, 1) d2 = 1 - similarity.similarity(2, 3) d3 = 1 - similarity.similarity(1, 3) evaluator = sequeval.Evaluator(training_set, test_set, items, 3) self.assertAlmostEqual((d1 + d2 + d3) / 3, evaluator.diversity(recommender, similarity)) evaluator = sequeval.Evaluator(training_set, test_set, items, 2) self.assertAlmostEqual(d1, evaluator.diversity(recommender, similarity))
def run(): _user_ratings = int(request.args.get('user-ratings')) _item_ratings = int(request.args.get('item-ratings')) _splitter = request.args.get('splitter') _ratio = float(request.args.get('ratio')) / 100 _k = int(request.args.get('length')) loader = sequeval.UIRTLoader(user_ratings=_user_ratings, item_ratings=_item_ratings) ratings = loader.load('datasets/yes_reduced.csv') builder = sequeval.Builder('1000 s') sequences, items = builder.build(ratings) profiler = sequeval.Profiler(sequences) response = { 'profiler': { 'users': profiler.users(), 'items': profiler.items(), 'ratings': profiler.ratings(), 'sequences': profiler.sequences(), 'sparsity': parse(profiler.sparsity()), 'length': parse(profiler.sequence_length()) } } if _splitter == 'random': splitter = sequeval.RandomSplitter(_ratio) else: splitter = sequeval.TimestampSplitter(_ratio) training_set, test_set = splitter.split(sequences) response['splitter'] = { 'training': len(training_set), 'test': len(test_set) } evaluator = sequeval.Evaluator(training_set, test_set, items, _k) cosine = sequeval.CosineSimilarity(training_set, items) response['evaluator'] = [] most_popular = baseline.MostPopularRecommender(training_set, items) response['evaluator'].append(evaluation(evaluator, most_popular, cosine)) random = baseline.RandomRecommender(training_set, items) response['evaluator'].append(evaluation(evaluator, random, cosine)) unigram = baseline.UnigramRecommender(training_set, items) response['evaluator'].append(evaluation(evaluator, unigram, cosine)) bigram = baseline.BigramRecommender(training_set, items) response['evaluator'].append(evaluation(evaluator, bigram, cosine)) return json.dumps(response)
def test_perplexity(self): evaluator = sequeval.Evaluator(training_set, test_set, items, 3) recommender = baseline.MostPopularRecommender(training_set, items) self.assertAlmostEqual(math.inf, evaluator.perplexity(recommender)) recommender = baseline.RandomRecommender(training_set, items) self.assertAlmostEqual(3.0, evaluator.perplexity(recommender))
def test_confidence(self): evaluator = sequeval.Evaluator(training_set, test_set, items, 3) recommender = baseline.MostPopularRecommender(training_set, items) self.assertEqual(1.0, evaluator.confidence(recommender)) recommender = baseline.RandomRecommender(training_set, items) self.assertAlmostEqual(1 / 3, evaluator.confidence(recommender))
def test_ndpm(self): recommender = baseline.MostPopularRecommender(training_set, items) evaluator = sequeval.Evaluator(training_set, test_set, items, 3) self.assertAlmostEqual(5 / 12, evaluator.ndpm(recommender)) evaluator = sequeval.Evaluator(training_set, test_set, items, 2) self.assertEqual(3 / 4, evaluator.ndpm(recommender))
def test_precision(self): recommender = baseline.MostPopularRecommender(training_set, items) evaluator = sequeval.Evaluator(training_set, test_set, items, 3) self.assertAlmostEqual(5 / 6, evaluator.precision(recommender)) evaluator = sequeval.Evaluator(training_set, test_set, items, 2) self.assertEqual(1.0, evaluator.precision(recommender))
def test_coverage(self): recommender = baseline.MostPopularRecommender(training_set, items) evaluator = sequeval.Evaluator(training_set, test_set, items, 3) self.assertEqual(3 / 3, evaluator.coverage(recommender)) evaluator = sequeval.Evaluator(training_set, test_set, items, 2) self.assertEqual(2 / 3, evaluator.coverage(recommender))
if args.splitter == 'random': print("\n# Random splitter") splitter = sequeval.RandomSplitter(args.ratio) elif args.splitter == 'timestamp': print("\n# Timestamp splitter") splitter = sequeval.TimestampSplitter(args.ratio) else: raise RuntimeError('Unknown splitter ' + args.splitter) training_set, test_set = splitter.split(sequences) print("Training set:", len(training_set)) print("Test set:", len(test_set)) print("\n# Evaluator") print("%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s\t%10s" % ("Algorithm", "Coverage", "Precision", "nDPM", "Diversity", "Novelty", "Serendipity", "Confidence", "Perplexity")) evaluator = sequeval.Evaluator(training_set, test_set, items, args.length) cosine = sequeval.CosineSimilarity(training_set, items) most_popular = baseline.MostPopularRecommender(training_set, items) evaluation(evaluator, most_popular, cosine) random = baseline.RandomRecommender(training_set, items) evaluation(evaluator, random, cosine) unigram = baseline.UnigramRecommender(training_set, items) evaluation(evaluator, unigram, cosine) bigram = baseline.BigramRecommender(training_set, items) evaluation(evaluator, bigram, cosine)