예제 #1
0
def reciprocal_rank_at_recall(run, label_store):

    scores_by_topic = dict()

    ## score for each topic
    for topic_id, results in run['results'].items():
        ## get all subtopics for the topic
        subtopic_ids = set(get_all_subtopics(label_store, topic_id))

        seen_subtopics = set()

        for idx, result in enumerate(results):
            assert idx == result['rank'] - 1

            # check off seen subtopics

            for subtopic, conf in get_best_subtopics(result['subtopics']):
                seen_subtopics.add(subtopic)

            if len(seen_subtopics) == len(subtopic_ids):
                break

        scores_by_topic[topic_id] = 1 / (idx + 1)

    ## macro average over all the topics
    macro_avg = mean(scores_by_topic.values())
    run['scores']['reciprocal_rank_at_recall'] = \
        {'scores_by_topic': scores_by_topic, 'macro_average': macro_avg}
예제 #2
0
def precision_at_recall(run, label_store):

    scores_by_topic = dict()

    ## score for each topic
    for topic_id, results in run['results'].items():
        ## get all subtopics for the topic
        subtopic_ids = set(get_all_subtopics(label_store, topic_id))

        seen_subtopics = set()
        relevant_docs = 0

        for idx, result in enumerate(results):
            assert idx == result['rank'] - 1

            result_subtopics = \
                {subtopic for subtopic, conf in get_best_subtopics(result['subtopics'])}

            if result['on_topic']:
                relevant_docs += 1

            seen_subtopics.update(result_subtopics)
            if len(seen_subtopics) == len(subtopic_ids):
                break

        ## precision is number of documents relevant at stopping point
        p = relevant_docs / (idx + 1)
        scores_by_topic[topic_id] = p

    ## macro average over all the topics
    macro_avg = mean(scores_by_topic.values())
    run['scores']['precision_at_recall'] = \
        {'scores_by_topic': scores_by_topic, 'macro_average': macro_avg}
def reciprocal_rank_at_recall(run, label_store):

    scores_by_topic = dict()

    ## score for each topic
    for topic_id, results in run['results'].items():
        ## get all subtopics for the topic
        subtopic_ids = set(get_all_subtopics(label_store, topic_id))

        seen_subtopics = set()


        for idx, result in enumerate(results):
            assert idx == result['rank'] - 1

            # check off seen subtopics

            for subtopic, conf in get_best_subtopics(result['subtopics']):
                seen_subtopics.add(subtopic)

            if len(seen_subtopics) == len(subtopic_ids):
                break

        scores_by_topic[topic_id] = 1/(idx + 1)

    ## macro average over all the topics
    macro_avg = mean(scores_by_topic.values())
    run['scores']['reciprocal_rank_at_recall'] = \
        {'scores_by_topic': scores_by_topic, 'macro_average': macro_avg}
def precision_at_recall(run, label_store):

    scores_by_topic = dict()

    ## score for each topic
    for topic_id, results in run['results'].items():
        ## get all subtopics for the topic
        subtopic_ids = set(get_all_subtopics(label_store, topic_id))

        seen_subtopics = set()
        relevant_docs = 0

        for idx, result in enumerate(results):
            assert idx == result['rank'] - 1

            result_subtopics = \
                {subtopic for subtopic, conf in get_best_subtopics(result['subtopics'])}

            if result['on_topic']:
                relevant_docs += 1

            seen_subtopics.update(result_subtopics)
            if len(seen_subtopics) == len(subtopic_ids):
                break

        ## precision is number of documents relevant at stopping point
        p = relevant_docs/(idx + 1)
        scores_by_topic[topic_id] = p

    ## macro average over all the topics
    macro_avg = mean(scores_by_topic.values())
    run['scores']['precision_at_recall'] = \
        {'scores_by_topic': scores_by_topic, 'macro_average': macro_avg}
예제 #5
0
def average_err(run,
                label_store,
                mean_type='arithmetic',
                relevance_metric='graded'):
    '''
    mean_type can be `arithmetic' or `harmonic'
    '''
    scores_by_topic = dict()
    relevance_func = relevance_metrics[relevance_metric]

    ## score for each topic
    for topic_id, results in run['results'].items():
        ## get all subtopics for the topic
        subtopic_ids = list(set(get_all_subtopics(label_store, topic_id)))

        # for each subtopic, compute a running stopping_p
        # and score
        p_continue = defaultdict(lambda: 1)
        score = defaultdict(float)

        for idx, result in enumerate(results):
            assert idx == result['rank'] - 1

            for subtopic, conf in get_best_subtopics(result['subtopics']):
                rel = relevance_func(conf)

                p_stop_here = p_continue[subtopic] * rel
                score[subtopic] += p_stop_here / (idx + 1)

                ## update stopping probabilities
                p_continue[subtopic] *= (1 - rel)

        ## precision is number of documents relevant at stopping point
        if mean_type == 'arithmetic':
            scores_by_topic[topic_id] = mean(score.values())
        elif mean_type == 'harmonic':
            scores_by_topic[topic_id] = harmonic_mean(score.values())
        else:
            sys.exit('Error: invalid mean type specified.')

    ## macro average over all the topics
    macro_avg = mean(scores_by_topic.values())

    scorer_name = 'average_err_%s' % mean_type

    run['scores'][scorer_name] = \
        {'scores_by_topic': scores_by_topic, 'macro_average': macro_avg}
def average_err(run, label_store, mean_type='arithmetic', relevance_metric='graded'):
    '''
    mean_type can be `arithmetic' or `harmonic'
    '''
    scores_by_topic = dict()
    relevance_func = relevance_metrics[relevance_metric]

    ## score for each topic
    for topic_id, results in run['results'].items():
        ## get all subtopics for the topic
        subtopic_ids = list(set(get_all_subtopics(label_store, topic_id)))

        # for each subtopic, compute a running stopping_p
        # and score
        p_continue = defaultdict(lambda: 1)
        score = defaultdict(float)

        for idx, result in enumerate(results):
            assert idx == result['rank'] - 1

            for subtopic, conf in get_best_subtopics(result['subtopics']):
                rel = relevance_func(conf)

                p_stop_here = p_continue[subtopic]*rel
                score[subtopic] += p_stop_here/(idx+1)

                ## update stopping probabilities
                p_continue[subtopic] *= (1-rel)


        ## precision is number of documents relevant at stopping point
        if mean_type == 'arithmetic':
            scores_by_topic[topic_id] = mean(score.values())
        elif mean_type == 'harmonic':
            scores_by_topic[topic_id] = harmonic_mean(score.values())
        else:
            sys.exit('Error: invalid mean type specified.')

    ## macro average over all the topics
    macro_avg = mean(scores_by_topic.values())

    scorer_name = 'average_err_%s' % mean_type

    run['scores'][scorer_name] = \
        {'scores_by_topic': scores_by_topic, 'macro_average': macro_avg}