Exemple #1
0
    def run(self):
        output_dir = self.get_results_path()

        # read the title queries from the chosen benchmark's topic file
        results1 = self.searcher1.query_from_file(
            self.benchmark.get_topics_file(), output_dir / "searcher1")
        results2 = self.searcher2.query_from_file(
            self.benchmark.get_topics_file(), output_dir / "searcher2")
        searcher_results = [results1, results2]

        # using the benchmark's folds, which each contain train/validation/test queries,
        # choose the best run in `output_dir` for the fold based on the validation queries
        # and return metrics calculated on the test queries
        best_results = evaluator.search_best_run(
            searcher_results,
            self.benchmark,
            primary_metric=self.config["optimize"],
            metrics=evaluator.DEFAULT_METRICS)

        for fold, path in best_results["path"].items():
            shortpath = "..." + path[-40:]
            logger.info("fold=%s best run: %s", fold, shortpath)

        logger.info("cross-validated results when optimizing for '%s':",
                    self.config["optimize"])
        for metric, score in sorted(best_results["score"].items()):
            logger.info("%15s: %0.4f", metric, score)

        return best_results
Exemple #2
0
def evaluate(config, modules):
    metric = "map"
    fold = config["fold"]
    train_output_path = _pipeline_path(config, modules)
    test_output_path = train_output_path / "pred" / "test" / "best"

    searcher = modules["searcher"]
    benchmark = modules["benchmark"]
    reranker = modules["reranker"]

    if os.path.exists(test_output_path):
        test_preds = Searcher.load_trec_run(test_output_path)
    else:
        topics_fn = benchmark.topic_file
        searcher_cache_dir = os.path.join(searcher.get_cache_path(), benchmark.name)
        searcher_run_dir = searcher.query_from_file(topics_fn, searcher_cache_dir)

        best_search_run_path = evaluator.search_best_run(searcher_run_dir, benchmark, metric)["path"][fold]
        best_search_run = searcher.load_trec_run(best_search_run_path)

        docids = set(docid for querydocs in best_search_run.values() for docid in querydocs)
        reranker["extractor"].create(qids=best_search_run.keys(), docids=docids, topics=benchmark.topics[benchmark.query_type])
        reranker.build()

        reranker["trainer"].load_best_model(reranker, train_output_path)

        test_run = {qid: docs for qid, docs in best_search_run.items() if qid in benchmark.folds[fold]["predict"]["test"]}
        test_dataset = PredDataset(qid_docid_to_rank=test_run, extractor=reranker["extractor"], mode="test")

        test_preds = reranker["trainer"].predict(reranker, test_dataset, test_output_path)

    metrics = evaluator.eval_runs(test_preds, benchmark.qrels, ["ndcg_cut_20", "ndcg_cut_10", "map", "P_20", "P_10"])
    print("test metrics for fold=%s:" % fold, metrics)

    print("\ncomputing metrics across all folds")
    avg = {}
    found = 0
    for fold in benchmark.folds:
        pred_path = _pipeline_path(config, modules, fold=fold) / "pred" / "test" / "best"
        if not os.path.exists(pred_path):
            print("\tfold=%s results are missing and will not be included" % fold)
            continue

        found += 1
        preds = Searcher.load_trec_run(pred_path)
        metrics = evaluator.eval_runs(preds, benchmark.qrels, ["ndcg_cut_20", "ndcg_cut_10", "map", "P_20", "P_10"])
        for metric, val in metrics.items():
            avg.setdefault(metric, []).append(val)

    avg = {k: np.mean(v) for k, v in avg.items()}
    print(f"average metrics across {found}/{len(benchmark.folds)} folds:", avg)
Exemple #3
0
def evaluate(config, modules):
    # output_path = _pipeline_path(config, modules)
    searcher = modules["searcher"]
    benchmark = modules["benchmark"]

    metric = config["optimize"]
    all_metric = ["ndcg_cut_20", "ndcg_cut_10", "map", "P_20", "P_10", "set_recall"]
    output_dir = searcher.get_cache_path() / benchmark.name
    best_results = evaluator.search_best_run(output_dir, benchmark, primary_metric=metric, metrics=all_metric)

    pathes = [f"\t{s}: {path}" for s, path in best_results["path"].items()]
    print("path for each split: \n", "\n".join(pathes))

    scores = [f"\t{s}: {score}" for s, score in best_results["score"].items()]
    print(f"cross-validated results when optimizing for {metric}: \n", "\n".join(scores))
Exemple #4
0
def train(config, modules):
    random.seed(config["seed"])
    np.random.seed(config["seed"])
    torch.manual_seed(config["seed"])
    torch.cuda.manual_seed_all(config["seed"])

    metric = "map"
    fold = config["fold"]

    searcher = modules["searcher"]
    benchmark = modules["benchmark"]
    reranker = modules["reranker"]

    if "index" in searcher.modules:
        searcher["index"].create_index()

    topics_fn = benchmark.topic_file
    searcher_cache_dir = os.path.join(searcher.get_cache_path(), benchmark.name)
    searcher_run_dir = searcher.query_from_file(topics_fn, searcher_cache_dir)


    results = evaluator.search_best_run(searcher_run_dir, benchmark, metric)
    best_search_run_path = results["path"][fold]
    best_search_run = searcher.load_trec_run(best_search_run_path)

    if config["rundocsonly"]:
        docids = set(docid for querydocs in best_search_run.values() for docid in querydocs)
        reranker["extractor"].create(qids=best_search_run.keys(), docids=docids,
                                     topics=benchmark.topics[benchmark.query_type])
        train_run = {qid: docs for qid, docs in best_search_run.items() if qid in benchmark.folds[fold]["train_qids"]}
        dev_run = {qid: docs for qid, docs in best_search_run.items() if qid in benchmark.folds[fold]["predict"]["dev"]}
    else:
        docids = set(docid for querydocs in benchmark.qrels.values() for docid in querydocs)
        reranker["extractor"].create(qids=benchmark.qrels.keys(), docids=docids,
                                     topics=benchmark.topics[benchmark.query_type])
        train_run = {qid: docs for qid, docs in benchmark.qrels.items() if qid in benchmark.folds[fold]["train_qids"]}
        dev_run = {qid: docs for qid, docs in benchmark.qrels.items() if qid in benchmark.folds[fold]["predict"]["dev"]}

    reranker.build()
    train_dataset = TrainDataset(qid_docid_to_rank=train_run, qrels=benchmark.qrels, extractor=reranker["extractor"])
    dev_dataset = PredDataset(qid_docid_to_rank=dev_run, qrels=benchmark.qrels, extractor=reranker["extractor"], mode="val")

    train_output_path = _pipeline_path(config, modules)
    dev_output_path = train_output_path / "pred" / "dev"
    reranker["trainer"].train(reranker, train_dataset, train_output_path, dev_dataset, dev_output_path, benchmark.qrels, metric)
Exemple #5
0
    def evaluate(self):
        metrics = self.config["metrics"] if list(self.config["metrics"]) != [
            "default"
        ] else evaluator.DEFAULT_METRICS

        best_results = evaluator.search_best_run(
            self.get_results_path(),
            self.benchmark,
            primary_metric=self.config["optimize"],
            metrics=metrics)

        for fold, path in best_results["path"].items():
            logger.info("rank: fold=%s best run: %s", fold, path)

        logger.info("rank: cross-validated results when optimizing for '%s':",
                    self.config["optimize"])
        for metric, score in sorted(best_results["score"].items()):
            logger.info("%25s: %0.4f", metric, score)

        return best_results