Beispiel #1
0
def main():
    READER_TRAIN  = DataReader(DATA_FILE_TRAIN, 0, True, 256)
    qrels, scores = load_file_to_rerank(QRELS, 1)
    _, to_rerank = load_file_to_rerank(DATA_FILE_PREDICT,0)
    torch.manual_seed(1)
    print_message('Starting')
    try:
        net = torch.load('tensors.duet').to(DEVICE)
        print_message("Previous Model Found and Loaded")
    except:
        print_message("No Previous Model Found. Creating new")
        net = Duet().to(DEVICE)
    criterion               = nn.CrossEntropyLoss()
    optimizer               = optim.Adam(net.parameters(), lr=LEARNING_RATE)
    print_message('Number of learnable parameters: {}'.format(net.parameter_count()))
    print_message('Training for {} epochs'.format(NUM_EPOCHS))
    for ep_idx in range(NUM_EPOCHS):
        net, train_loss  = train(READER_TRAIN,net, optimizer,criterion)
        print_message('epoch:{}, loss: {}'.format(ep_idx + 1, train_loss / EPOCH_SIZE))
        mrr = evaluate(net,qrels,scores, to_rerank)
        print_message('MRR @1000:{}'.format(mrr))
        torch.cuda.empty_cache()
        torch.save(net,'tensors.duet')
    print_message('Done Training')
    print_message('Evaluating')
    metrics = msmarco_eval.compute_metrics_from_files(QRELS,DATA_FILE_PREDICTOUT)
    print_message('#####################')
    for metric in sorted(metrics):
        print_message('{}: {}'.format(metric, metrics[metric]))
    print_message('#####################')            
Beispiel #2
0
def main():
    args = get_args()

    if args.sp_folder is not None:
        eval_specific_folder(args)
        exit(0)

    if args.full_partition:
        cache_fname = 'full_collection.npy'
        if os.path.exists(cache_fname):
            collection = np.load(cache_fname, allow_pickle=True).item()
        else:
            collection = load_collection(args)
            np.save(cache_fname, collection)
    else:
        collection = load_collection(args)

    if args.each_layer:
        eval_each_layer(args, collection)
        exit(0)
    if args.detail:
        print_relevant_pair_details(args, collection)
    final_scores, avg_layer = generate_final_scores_confidence(
        args, collection)
    run_fname = generate_run_file(args, final_scores)
    metrics = compute_metrics_from_files(args.reference, run_fname)
    print('Avg layer: {}'.format(avg_layer))
    for metric in sorted(metrics):
        print('{}: {}'.format(metric, metrics[metric]))
        break

    if not args.no_save_np:
        save_to_np(args, metrics, avg_layer)
Beispiel #3
0
def eval_each_layer(args, collection):
    with open('each_layer.txt', 'w') as fout:
        for i in range(args.n_layers):
            final_scores = defaultdict(
                list)  # key: qid; value: list of [pid, score]
            for qid, qid_value in collection.items():
                for pid, score_list in qid_value.items():
                    final_scores[qid].append(
                        [pid, score_list[i], i + 1, score_list])

            run_fname = generate_run_file(args, final_scores)
            metrics = compute_metrics_from_files(args.reference, run_fname)
            print(f'Layer {i}:\t{metrics["MRR @10"]}', file=fout)
Beispiel #4
0
def eval_specific_folder(args):
    final_scores = defaultdict(list)  # key: qid; value: list of [pid, score]
    used_layers = 0
    pairs = 0

    for par in args.partition:
        with open(
                os.path.join(args.sp_folder,
                             f'{args.split}.partition{par}.score')) as fin:
            for line in fin:
                qid, pid, score, layer = line.strip().split('\t')
                pairs += 1
                final_scores[qid].append([pid, float(score), int(layer), []])
                used_layers += int(layer)

    run_fname = generate_run_file(args, final_scores)
    print('Avg-layer:', used_layers / pairs)
    metrics = compute_metrics_from_files(args.reference, run_fname)
    for metric in sorted(metrics):
        print('{}: {}'.format(metric, metrics[metric]))
Beispiel #5
0
class FakeScorer():
    def __init__(self):
        pass

    def score_query_passages(self, query, passages, batch_size):
        return [1] * len(passages)

bm25_path = "../data/evaluation/bm25/run.dev.small.tsv"
queries_path = "../data/queries/queries.dev.small.tsv"
passages_path = "../data/passages/passages.bm25.small.json"
candidate_path = "../data/evaluation/test/run.tsv"
reference_path = "../data/evaluation/gold/qrels.dev.small.tsv"

n_top = 50
n_queries_to_evaluate = None

mrr_ref = 0.18741227770955546

model = FakeScorer()

mrr = EvaluationQueries(bm25_path, queries_path, passages_path, n_top)

mrr.score(model, candidate_path, n_queries_to_evaluate)
mrr_metrics = compute_metrics_from_files(reference_path, candidate_path)

assert int(1000*mrr_ref) == int(1000*mrr_metrics['MRR @10']), "Test failed"

print(mrr_metrics)
print('Test ok')
Beispiel #6
0
def evaluate(path_to_reference,path_to_candidate):
    metrics = msmarco_eval.compute_metrics_from_files(path_to_reference, path_to_candidate)
    print('#####################')
    for metric in sorted(metrics):
        print('{}: {}'.format(metric, metrics[metric]))
    print('#####################')