예제 #1
0
def eval_hits_at_k_file(filename, k=2, oracle=False):
    list_of_list_of_labels, list_of_list_of_scores = utils.load_labels(
        filename)
    return eval_hits_at_k(list_of_list_of_labels,
                          list_of_list_of_scores,
                          k=k,
                          oracle=oracle)
예제 #2
0
파일: tpms.py 프로젝트: mspector/expertise
def test(config):

    score_file_path = os.path.join(config.test_dir, 'test_scores.jsonl')
    labels_file_path = os.path.join(config.setup_dir, 'test_labels.jsonl')
    tpms_scores_file = config.tpms_scores_file

    scores = {}
    for data in utils.jsonl_reader(tpms_scores_file):
        source_id = data['source_id']
        target_id = data['target_id']
        score = data['score']
        if source_id not in scores:
            scores[source_id] = {}

        if target_id not in scores[source_id]:
            scores[source_id][target_id] = score

    with open(score_file_path, 'w') as w:

        for data in utils.jsonl_reader(labels_file_path):
            paperid = data['source_id']
            userid = data['target_id']
            label = data['label']

            if paperid in scores:
                score = scores[paperid].get(userid, 0.0)
                if float(score) > -float('inf'):
                    result = {
                        'source_id': paperid,
                        'target_id': userid,
                        'score': float(score),
                        'label': int(label)
                    }

                    w.write(json.dumps(result) + '\n')

    (list_of_list_of_labels,
     list_of_list_of_scores) = utils.load_labels(score_file_path)

    map_score = float(eval_map(list_of_list_of_labels, list_of_list_of_scores))
    hits_at_1 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=1))
    hits_at_3 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=3))
    hits_at_5 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=5))
    hits_at_10 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=10))

    score_lines = [[
        config.name, text, data
    ] for text, data in [('MAP', map_score), (
        'Hits@1', hits_at_1), ('Hits@3',
                               hits_at_3), ('Hits@5',
                                            hits_at_5), ('Hits@10',
                                                         hits_at_10)]]
    config.test_save(score_lines, 'test.scores.tsv')
예제 #3
0
def test(config):

    dataset = Dataset(**config.dataset)

    labels_by_reviewer_by_forum = defaultdict(dict)
    for bid in dataset.bids():
        label = 1 if bid.tag in dataset.positive_bid_values else 0
        labels_by_reviewer_by_forum[bid.forum][bid.signatures[0]] = label

    inferred_scores_path = os.path.join(config.infer_dir,
                                        config.name + '-scores.jsonl')

    labeled_data_list = []
    for data in utils.jsonl_reader(inferred_scores_path):
        forum = data['source_id']
        reviewer = data['target_id']
        score = float(data['score'])
        if not score >= 0.0:
            score = 0.0

        if reviewer in labels_by_reviewer_by_forum[forum]:
            label = labels_by_reviewer_by_forum[forum][reviewer]

            labeled_data = {k: v for k, v in data.items()}
            labeled_data.update({'label': label, 'score': score})
            labeled_data_list.append(labeled_data)

    config.test_save(labeled_data_list, 'score_labels.jsonl')

    labels_file = config.test_path('score_labels.jsonl')

    list_of_list_of_labels, list_of_list_of_scores = utils.load_labels(
        labels_file)

    map_score = float(eval_map(list_of_list_of_labels, list_of_list_of_scores))
    hits_at_1 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=1))
    hits_at_3 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=3))
    hits_at_5 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=5))
    hits_at_10 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=10))

    score_lines = [[
        config.name, text, data
    ] for text, data in [('MAP', map_score), (
        'Hits@1', hits_at_1), ('Hits@3',
                               hits_at_3), ('Hits@5',
                                            hits_at_5), ('Hits@10',
                                                         hits_at_10)]]
    config.test_save(score_lines, 'test.scores.tsv')
예제 #4
0
def eval_map_file(filename):
    list_of_list_of_labels, list_of_list_of_scores = utils.load_labels(
        filename)
    return eval_map(list_of_list_of_labels, list_of_list_of_scores)