def infer_params(self, click_model, search_sessions, holdout_search_sessions=None): if search_sessions is None or len(search_sessions) == 0: return if holdout_search_sessions is not None: loglikelihood = LogLikelihood() prompt = 'Starting EM for %s click model:' % click_model.__class__.__name__ print print prompt print '=' * len(prompt) for iteration in xrange(self.iter_num): new_click_model = click_model.__class__() for search_session in search_sessions: current_session_params = click_model.get_session_params(search_session) new_session_params = new_click_model.get_session_params(search_session) for rank, result in enumerate(search_session.web_results): for param_name, param in new_session_params[rank].items(): param.update(search_session, rank, current_session_params) click_model.params = new_click_model.params if holdout_search_sessions is not None: print 'Iteration %d/%d finished: %.12f log-likelihood.' % (iteration + 1, self.iter_num, loglikelihood.evaluate(click_model, holdout_search_sessions)) else: print 'Iteration %d/%d finished.' % (iteration + 1, self.iter_num)
logger.info('train query num: {}'.format(len(train_dataset))) logger.info('dev query num: {}'.format(len(dev_dataset))) logger.info('test query num: {}'.format(len(test_dataset))) logger.info('human label has {} queries'.format(len(relevance_queries))) # Train logger.info('Start training') start = time.time() click_model.train(train_dataset) end = time.time() logger.info('Finish training. Time consumed: {} seconds'.format(end - start)) # Log likelihood logger.info('Computing log likelihood') loglikelihood = LogLikelihood() start = time.time() ll_value = loglikelihood.evaluate(click_model, test_dataset) end = time.time() logger.info('Log likelihood: {}. Time consumed: {} seconds'.format( ll_value, end - start)) # Perplexity logger.info('Computing perplexity') perplexity = Perplexity() start = time.time() perp_value = perplexity.evaluate(click_model, test_dataset)[0] end = time.time() logger.info('Perplexity: {}. Time consumed: {} seconds'.format( perp_value, end - start))
train_queries = Utils.get_unique_queries(train_sessions) test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries) test_queries = Utils.get_unique_queries(test_sessions) print("===============================") print("Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries))) print("===============================") start = time.time() click_model.train(train_sessions) end = time.time() print("\tTrained %s click model in %i secs:\n%r" % (click_model.__class__.__name__, end - start, click_model)) print("-------------------------------") print("Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries))) print("-------------------------------") loglikelihood = LogLikelihood() perplexity = Perplexity() start = time.time() ll_value = loglikelihood.evaluate(click_model, test_sessions) end = time.time() print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start)) start = time.time() perp_value = perplexity.evaluate(click_model, test_sessions)[0] end = time.time() print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))
sys.exit(1) click_model = globals()[sys.argv[1]]() search_sessions_path = sys.argv[2] search_sessions_num = int(sys.argv[3]) search_sessions = YandexRelPredChallengeParser().parse(search_sessions_path, search_sessions_num) train_test_split = int(len(search_sessions) * 0.75) train_sessions = search_sessions[:train_test_split] train_queries = Utils.get_unique_queries(train_sessions) test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries) test_queries = Utils.get_unique_queries(test_sessions) print "-------------------------------" print "Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries)) print "-------------------------------" click_model.train(train_sessions) print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model) print "-------------------------------" print "Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries)) print "-------------------------------" loglikelihood = LogLikelihood() print "\tlog-likelihood: %f" % loglikelihood.evaluate(click_model, test_sessions) perplexity = Perplexity() print "\tperplexity: %f" % perplexity.evaluate(click_model, test_sessions)[0]
search_sessions_path, search_sessions_num) train_test_split = int(len(search_sessions) * 0.75) train_sessions = search_sessions[:train_test_split] train_queries = Utils.get_unique_queries(train_sessions) test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries) test_queries = Utils.get_unique_queries(test_sessions) print "-------------------------------" print "Training on %d search sessions (%d unique queries)." % ( len(train_sessions), len(train_queries)) print "-------------------------------" click_model.train(train_sessions) print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model) print "-------------------------------" print "Testing on %d search sessions (%d unique queries)." % ( len(test_sessions), len(test_queries)) print "-------------------------------" loglikelihood = LogLikelihood() print "\tlog-likelihood: %f" % loglikelihood.evaluate( click_model, test_sessions) perplexity = Perplexity() print "\tperplexity: %f" % perplexity.evaluate(click_model, test_sessions)[0]
sys.exit(1) click_model = globals()[sys.argv[1]]() search_sessions_path = sys.argv[2] search_sessions_num = int(sys.argv[3]) search_sessions = YandexRelPredChallengeParser().parse(search_sessions_path, search_sessions_num) train_test_split = int(len(search_sessions) * 0.75) train_sessions = search_sessions[:train_test_split] train_queries = Utils.get_unique_queries(train_sessions) test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries) test_queries = Utils.get_unique_queries(test_sessions) print "-------------------------------" print "Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries)) print "-------------------------------" click_model.train(search_sessions) #print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model) print "-------------------------------" print "Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries)) print "-------------------------------" loglikelihood = LogLikelihood() print "\tlog-likelihood: %f" % loglikelihood.evaluate(click_model, test_queries) perplexity = Perplexity() print "\tperplexity: %f" % perplexity.evaluate(click_model, test_queries)[0]