def evaluate_fit(trained_model, test_sessions, test_queries): """ Evaluate the model's fit to the observed data - i.e. whether C==0 or C==1 for every item/session. There are two measures: - Log likelihood goes from negative infinity (bad) to 0 (good) - It measures the likelihood of observing the clicks in all the test sessions if the model is correct - Perplexity goes from 1 (good) to 2 (bad). - It's a measure of how surprised we are about all clicks and non-clicks in all of the test sessions if the model is correct. - When comparing models you can use perplexity gain (pB - pA) / (pB - 1) - It can be computed at individual ranks, or averaged across all ranks. Perplexity is normally higher for higher ranks. """ print("-------------------------------") print("Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries))) print("-------------------------------") loglikelihood = LogLikelihood() perplexity = Perplexity() start = time.time() ll_value = loglikelihood.evaluate(trained_model, test_sessions) end = time.time() print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start)) start = time.time() perp_value = perplexity.evaluate(trained_model, test_sessions)[0] end = time.time() print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))
test_queries = Utils.get_unique_queries(test_sessions) print("===============================") print("Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries))) print("===============================") start = time.time() click_model.train(train_sessions) end = time.time() print("\tTrained %s click model in %i secs:\n%r" % (click_model.__class__.__name__, end - start, click_model)) print("-------------------------------") print("Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries))) print("-------------------------------") loglikelihood = LogLikelihood() perplexity = Perplexity() start = time.time() ll_value = loglikelihood.evaluate(click_model, test_sessions) end = time.time() print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start)) start = time.time() perp_value = perplexity.evaluate(click_model, test_sessions)[0] end = time.time() print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))
end = time.time() logger.info('Finish training. Time consumed: {} seconds'.format(end - start)) # Log likelihood logger.info('Computing log likelihood') loglikelihood = LogLikelihood() start = time.time() ll_value = loglikelihood.evaluate(click_model, test_dataset) end = time.time() logger.info('Log likelihood: {}. Time consumed: {} seconds'.format( ll_value, end - start)) # Perplexity logger.info('Computing perplexity') perplexity = Perplexity() start = time.time() perp_value = perplexity.evaluate(click_model, test_dataset)[0] end = time.time() logger.info('Perplexity: {}. Time consumed: {} seconds'.format( perp_value, end - start)) # NDCG logger.info('Computing NDCG@k') RelevanceEstimation = RankingPerformance(args) start = time.time() ndcg_version1 = {} ndcg_version2 = {} ks = [1, 3, 5, 10] for k in ks: ndcg_version1[k], ndcg_version2[k] = RelevanceEstimation.evaluate(
sys.exit(1) click_model = globals()[sys.argv[1]]() search_sessions_path = sys.argv[2] search_sessions_num = int(sys.argv[3]) search_sessions = YandexRelPredChallengeParser().parse(search_sessions_path, search_sessions_num) train_test_split = int(len(search_sessions) * 0.75) train_sessions = search_sessions[:train_test_split] train_queries = Utils.get_unique_queries(train_sessions) test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries) test_queries = Utils.get_unique_queries(test_sessions) print "-------------------------------" print "Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries)) print "-------------------------------" click_model.train(train_sessions) print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model) print "-------------------------------" print "Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries)) print "-------------------------------" loglikelihood = LogLikelihood() print "\tlog-likelihood: %f" % loglikelihood.evaluate(click_model, test_sessions) perplexity = Perplexity() print "\tperplexity: %f" % perplexity.evaluate(click_model, test_sessions)[0]
search_sessions_path, search_sessions_num) train_test_split = int(len(search_sessions) * 0.75) train_sessions = search_sessions[:train_test_split] train_queries = Utils.get_unique_queries(train_sessions) test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries) test_queries = Utils.get_unique_queries(test_sessions) print "-------------------------------" print "Training on %d search sessions (%d unique queries)." % ( len(train_sessions), len(train_queries)) print "-------------------------------" click_model.train(train_sessions) print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model) print "-------------------------------" print "Testing on %d search sessions (%d unique queries)." % ( len(test_sessions), len(test_queries)) print "-------------------------------" loglikelihood = LogLikelihood() print "\tlog-likelihood: %f" % loglikelihood.evaluate( click_model, test_sessions) perplexity = Perplexity() print "\tperplexity: %f" % perplexity.evaluate(click_model, test_sessions)[0]
sys.exit(1) click_model = globals()[sys.argv[1]]() search_sessions_path = sys.argv[2] search_sessions_num = int(sys.argv[3]) search_sessions = YandexRelPredChallengeParser().parse(search_sessions_path, search_sessions_num) train_test_split = int(len(search_sessions) * 0.75) train_sessions = search_sessions[:train_test_split] train_queries = Utils.get_unique_queries(train_sessions) test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries) test_queries = Utils.get_unique_queries(test_sessions) print "-------------------------------" print "Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries)) print "-------------------------------" click_model.train(search_sessions) #print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model) print "-------------------------------" print "Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries)) print "-------------------------------" loglikelihood = LogLikelihood() print "\tlog-likelihood: %f" % loglikelihood.evaluate(click_model, test_queries) perplexity = Perplexity() print "\tperplexity: %f" % perplexity.evaluate(click_model, test_queries)[0]