Ejemplo n.º 1
0
    def infer_params(self, click_model, search_sessions, holdout_search_sessions=None):
        if search_sessions is None or len(search_sessions) == 0:
            return

        if holdout_search_sessions is not None:
            loglikelihood = LogLikelihood()

        prompt = 'Starting EM for %s click model:' % click_model.__class__.__name__

        print
        print prompt
        print '=' * len(prompt)

        for iteration in xrange(self.iter_num):
            new_click_model = click_model.__class__()

            for search_session in search_sessions:
                current_session_params = click_model.get_session_params(search_session)
                new_session_params = new_click_model.get_session_params(search_session)

                for rank, result in enumerate(search_session.web_results):
                    for param_name, param in new_session_params[rank].items():
                        param.update(search_session, rank, current_session_params)

            click_model.params = new_click_model.params

            if holdout_search_sessions is not None:
                print 'Iteration %d/%d finished: %.12f log-likelihood.' % (iteration + 1, self.iter_num, loglikelihood.evaluate(click_model, holdout_search_sessions))
            else:
                print 'Iteration %d/%d finished.' % (iteration + 1, self.iter_num)
Ejemplo n.º 2
0
    logger.info('train query num: {}'.format(len(train_dataset)))
    logger.info('dev query num: {}'.format(len(dev_dataset)))
    logger.info('test query num: {}'.format(len(test_dataset)))
    logger.info('human label has {} queries'.format(len(relevance_queries)))

    # Train
    logger.info('Start training')
    start = time.time()
    click_model.train(train_dataset)
    end = time.time()
    logger.info('Finish training. Time consumed: {} seconds'.format(end -
                                                                    start))

    # Log likelihood
    logger.info('Computing log likelihood')
    loglikelihood = LogLikelihood()
    start = time.time()
    ll_value = loglikelihood.evaluate(click_model, test_dataset)
    end = time.time()
    logger.info('Log likelihood: {}. Time consumed: {} seconds'.format(
        ll_value, end - start))

    # Perplexity
    logger.info('Computing perplexity')
    perplexity = Perplexity()
    start = time.time()
    perp_value = perplexity.evaluate(click_model, test_dataset)[0]
    end = time.time()
    logger.info('Perplexity: {}. Time consumed: {} seconds'.format(
        perp_value, end - start))
Ejemplo n.º 3
0
    train_queries = Utils.get_unique_queries(train_sessions)

    test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries)
    test_queries = Utils.get_unique_queries(test_sessions)

    print("===============================")
    print("Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries)))
    print("===============================")

    start = time.time()
    click_model.train(train_sessions)
    end = time.time()
    print("\tTrained %s click model in %i secs:\n%r" % (click_model.__class__.__name__, end - start, click_model))

    print("-------------------------------")
    print("Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries)))
    print("-------------------------------")

    loglikelihood = LogLikelihood()
    perplexity = Perplexity()

    start = time.time()
    ll_value = loglikelihood.evaluate(click_model, test_sessions)
    end = time.time()
    print("\tlog-likelihood: %f; time: %i secs" % (ll_value, end - start))

    start = time.time()
    perp_value = perplexity.evaluate(click_model, test_sessions)[0]
    end = time.time()
    print("\tperplexity: %f; time: %i secs" % (perp_value, end - start))
Ejemplo n.º 4
0
        sys.exit(1)

    click_model = globals()[sys.argv[1]]()
    search_sessions_path = sys.argv[2]
    search_sessions_num = int(sys.argv[3])

    search_sessions = YandexRelPredChallengeParser().parse(search_sessions_path, search_sessions_num)

    train_test_split = int(len(search_sessions) * 0.75)
    train_sessions = search_sessions[:train_test_split]
    train_queries = Utils.get_unique_queries(train_sessions)

    test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries)
    test_queries = Utils.get_unique_queries(test_sessions)

    print "-------------------------------"
    print "Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries))
    print "-------------------------------"

    click_model.train(train_sessions)
    print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model)

    print "-------------------------------"
    print "Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries))
    print "-------------------------------"

    loglikelihood = LogLikelihood()
    print "\tlog-likelihood: %f" % loglikelihood.evaluate(click_model, test_sessions)
    perplexity = Perplexity()
    print "\tperplexity: %f" % perplexity.evaluate(click_model, test_sessions)[0]
Ejemplo n.º 5
0
        search_sessions_path, search_sessions_num)

    train_test_split = int(len(search_sessions) * 0.75)
    train_sessions = search_sessions[:train_test_split]
    train_queries = Utils.get_unique_queries(train_sessions)

    test_sessions = Utils.filter_sessions(search_sessions[train_test_split:],
                                          train_queries)
    test_queries = Utils.get_unique_queries(test_sessions)

    print "-------------------------------"
    print "Training on %d search sessions (%d unique queries)." % (
        len(train_sessions), len(train_queries))
    print "-------------------------------"

    click_model.train(train_sessions)
    print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__,
                                             click_model)

    print "-------------------------------"
    print "Testing on %d search sessions (%d unique queries)." % (
        len(test_sessions), len(test_queries))
    print "-------------------------------"

    loglikelihood = LogLikelihood()
    print "\tlog-likelihood: %f" % loglikelihood.evaluate(
        click_model, test_sessions)
    perplexity = Perplexity()
    print "\tperplexity: %f" % perplexity.evaluate(click_model,
                                                   test_sessions)[0]
Ejemplo n.º 6
0
        sys.exit(1)

    click_model = globals()[sys.argv[1]]()
    search_sessions_path = sys.argv[2]
    search_sessions_num = int(sys.argv[3])

    search_sessions = YandexRelPredChallengeParser().parse(search_sessions_path, search_sessions_num)

    train_test_split = int(len(search_sessions) * 0.75)
    train_sessions = search_sessions[:train_test_split]
    train_queries = Utils.get_unique_queries(train_sessions)

    test_sessions = Utils.filter_sessions(search_sessions[train_test_split:], train_queries)
    test_queries = Utils.get_unique_queries(test_sessions)

    print "-------------------------------"
    print "Training on %d search sessions (%d unique queries)." % (len(train_sessions), len(train_queries))
    print "-------------------------------"

    click_model.train(search_sessions)
    #print "\tTrained %s click model:\n%r" % (click_model.__class__.__name__, click_model)

    print "-------------------------------"
    print "Testing on %d search sessions (%d unique queries)." % (len(test_sessions), len(test_queries))
    print "-------------------------------"

    loglikelihood = LogLikelihood()
    print "\tlog-likelihood: %f" % loglikelihood.evaluate(click_model, test_queries)
    perplexity = Perplexity()
    print "\tperplexity: %f" % perplexity.evaluate(click_model, test_queries)[0]