Example #1
0
def test(config):

    score_file_path = os.path.join(config.test_dir, 'test_scores.jsonl')
    labels_file_path = os.path.join(config.setup_dir, 'test_labels.jsonl')
    tpms_scores_file = config.tpms_scores_file

    scores = {}
    for data in utils.jsonl_reader(tpms_scores_file):
        source_id = data['source_id']
        target_id = data['target_id']
        score = data['score']
        if source_id not in scores:
            scores[source_id] = {}

        if target_id not in scores[source_id]:
            scores[source_id][target_id] = score

    with open(score_file_path, 'w') as w:

        for data in utils.jsonl_reader(labels_file_path):
            paperid = data['source_id']
            userid = data['target_id']
            label = data['label']

            if paperid in scores:
                score = scores[paperid].get(userid, 0.0)
                if float(score) > -float('inf'):
                    result = {
                        'source_id': paperid,
                        'target_id': userid,
                        'score': float(score),
                        'label': int(label)
                    }

                    w.write(json.dumps(result) + '\n')

    (list_of_list_of_labels,
     list_of_list_of_scores) = utils.load_labels(score_file_path)

    map_score = float(eval_map(list_of_list_of_labels, list_of_list_of_scores))
    hits_at_1 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=1))
    hits_at_3 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=3))
    hits_at_5 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=5))
    hits_at_10 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=10))

    score_lines = [[
        config.name, text, data
    ] for text, data in [('MAP', map_score), (
        'Hits@1', hits_at_1), ('Hits@3',
                               hits_at_3), ('Hits@5',
                                            hits_at_5), ('Hits@10',
                                                         hits_at_10)]]
    config.test_save(score_lines, 'test.scores.tsv')
def load_jsonl(filename):

    labels_by_forum = defaultdict(dict)
    scores_by_forum = defaultdict(dict)

    for data in utils.jsonl_reader(filename):
        forum = data['source_id']
        reviewer = data['target_id']
        label = data['label']
        score = data['score']
        labels_by_forum[forum][reviewer] = label
        scores_by_forum[forum][reviewer] = score

    result_labels = []
    result_scores = []

    for forum, labels_by_reviewer in labels_by_forum.items():
        scores_by_reviewer = scores_by_forum[forum]

        reviewer_scores = list(scores_by_reviewer.items())
        reviewer_labels = list(labels_by_reviewer.items())

        sorted_labels = [label for _, label in sorted(reviewer_labels)]
        sorted_scores = [score for _, score in sorted(reviewer_scores)]

        result_labels.append(sorted_labels)
        result_scores.append(sorted_scores)

    return result_labels, result_scores
Example #3
0
def test(config):

    dataset = Dataset(**config.dataset)

    labels_by_reviewer_by_forum = defaultdict(dict)
    for bid in dataset.bids():
        label = 1 if bid.tag in dataset.positive_bid_values else 0
        labels_by_reviewer_by_forum[bid.forum][bid.signatures[0]] = label

    inferred_scores_path = os.path.join(config.infer_dir,
                                        config.name + '-scores.jsonl')

    labeled_data_list = []
    for data in utils.jsonl_reader(inferred_scores_path):
        forum = data['source_id']
        reviewer = data['target_id']
        score = float(data['score'])
        if not score >= 0.0:
            score = 0.0

        if reviewer in labels_by_reviewer_by_forum[forum]:
            label = labels_by_reviewer_by_forum[forum][reviewer]

            labeled_data = {k: v for k, v in data.items()}
            labeled_data.update({'label': label, 'score': score})
            labeled_data_list.append(labeled_data)

    config.test_save(labeled_data_list, 'score_labels.jsonl')

    labels_file = config.test_path('score_labels.jsonl')

    list_of_list_of_labels, list_of_list_of_scores = utils.load_labels(
        labels_file)

    map_score = float(eval_map(list_of_list_of_labels, list_of_list_of_scores))
    hits_at_1 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=1))
    hits_at_3 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=3))
    hits_at_5 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=5))
    hits_at_10 = float(
        eval_hits_at_k(list_of_list_of_labels, list_of_list_of_scores, k=10))

    score_lines = [[
        config.name, text, data
    ] for text, data in [('MAP', map_score), (
        'Hits@1', hits_at_1), ('Hits@3',
                               hits_at_3), ('Hits@5',
                                            hits_at_5), ('Hits@10',
                                                         hits_at_10)]]
    config.test_save(score_lines, 'test.scores.tsv')
Example #4
0
def read_bid_records(data_dir, return_batches):
    for filename in os.listdir(data_dir):
        filepath = os.path.join(data_dir, filename)
        file_id = filename.replace('.jsonl', '')

        if return_batches:
            batch = []

        for record in utils.jsonl_reader(filepath):

            if not return_batches:
                yield file_id, record
            else:
                batch.append(record)

        if return_batches:
            yield file_id, batch
Example #5
0
 def _read_bids(self):
     for filename in os.listdir(self.bids_dir):
         filepath = os.path.join(self.bids_dir, filename)
         file_id = filename.replace('.jsonl', '')
         for json_line in utils.jsonl_reader(filepath):
             yield Tag.from_json(json_line)
 def fold_reader(id):
     fold_file = f'{id}.jsonl'
     fold_path = os.path.join(config.kp_setup_dir, 'folds', fold_file)
     return utils.jsonl_reader(fold_path)
Example #7
0
 def fold_reader(id):
     fold_file = f'{id}.jsonl'
     fold_path = os.path.join(config.bpr_samples, fold_file)
     return utils.jsonl_reader(fold_path)