Example #1
0
def main():
    qk_candidate_name = "qk_candidate_msmarco_filtered_dev"
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"),
                                     is_correct_factory())
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3",
                                      "dev")

    # Selected from doc_scorer_summarizer.py
    qk_candidate_name = "qk_candidate_msmarco_filtered_train"
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_train(generator, qk_candidate_name, "qcknc3")
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc3")
Example #2
0
def qck_gen(job_name, qk_candidate_name, candidate_ranked_list_path,
            kdp_ranked_list_path, split):
    claim_ids = load_claim_ids_for_split(split)
    cids: List[str] = lmap(str, claim_ids)
    qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name)
    kdp_ranked_list: Dict[
        str, List[TrecRankedListEntry]] = load_ranked_list_grouped(
            kdp_ranked_list_path)

    print("cids", len(cids))
    print("len(qk_candidate)", len(qk_candidate))
    print("Generate instances : ", split)
    generator = QCKInstGenWScore(
        get_qck_candidate_from_ranked_list_path(candidate_ranked_list_path),
        is_correct_factory(), kdp_ranked_list)
    qk_candidate_train: List[QKUnit] = list(
        [qk for qk in qk_candidate if qk[0].query_id in cids])

    def worker_factory(out_dir):
        return QCKWorker(qk_candidate_train, generator, out_dir)

    num_jobs = d_n_claims_per_split2[split]
    runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split,
                        worker_factory)
    runner.start()
Example #3
0
def make_test():
    split = "test"
    qk_candidate_name = "qk_candidate_msmarco_filtered_{}".format(split)
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split),
                                     is_correct_factory())
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3",
                                      split)
Example #4
0
def main():
    for split in ["dev", "test"]:
        generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split),
                                         is_correct_factory())
        # Selected from doc_scorer_summarizer.py
        qk_candidate_name = "pc_qk2_{}_cpnc12_filtered".format(split)
        start_generate_jobs(generator, split, qk_candidate_name, "qck11")
Example #5
0
def main():
    save_dir = os.path.join(output_path, "pc_qc")
    exist_or_mkdir(save_dir)
    for split in splits:
        queries = get_qck_queries(split)
        eval_candidate = get_eval_candidates_as_qck(split)
        save_path = os.path.join(save_dir, split)
        make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
Example #6
0
def main():
    sub_split = "dev"
    name_prefix = "qcknc2"
    qk_candidate_name = "qk_stage2_dev_2"

    generator = QCKInstanceGenerator(get_eval_candidates_as_qck(sub_split),
                                     is_correct_factory())
    run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name,
                               name_prefix)
Example #7
0
def main():
    save_dir = os.path.join(output_path, "pc_qc4")
    exist_or_mkdir(save_dir)
    split_filename = split_name2
    for split in splits:
        qids: Iterable[str] = get_qids_for_split(split_filename, split)
        queries = get_qck_queries_from_cids(lmap(int, qids))
        eval_candidate = get_qck_candidate_for_split(split_filename, split)
        save_path = os.path.join(save_dir, split)
        make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
Example #8
0
def main():
    save_dir = os.path.join(output_path, "pc_qc2")
    exist_or_mkdir(save_dir)
    for split in splits:
        queries = get_qck_queries(split)
        q_res_path = os.path.join("output",
                                  "perspective_experiments",
                                  "q_res_{}.txt".format(split))
        eval_candidate = get_qck_candidate_from_ranked_list_path(q_res_path)
        save_path = os.path.join(save_dir, split)
        make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
Example #9
0
def qck_gen_w_ranked_list(job_name,
                          qk_candidates: List[QKUnit],
                          qck_candidates_dict: Dict[str, List[QCKCandidate]],
                          split):
    qids = list(get_qids_for_split(split_name2, split))
    print("Generate instances : ", split)
    generator = QCKInstanceGenerator(qck_candidates_dict, is_correct_factory())
    qk_candidates_for_split: List[QKUnit] = list([qk for qk in qk_candidates if qk[0].query_id in qids])
    print("{} of {} qk are used".format(len(qk_candidates_for_split), len(qk_candidates)))

    def worker_factory(out_dir):
        return QCKWorker(qk_candidates_for_split,
                         generator,
                         out_dir)

    num_jobs = len(qids)
    runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory)
    runner.start()
Example #10
0
def main(config):
    job_name = "qck_multi"
    is_correct_fn = is_correct_factory()
    qk_candidate_name = "qk_candidate_msmarco_filtered_dev"
    generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("dev"),
                                          is_correct_fn, config)
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name,
                                      "dev")

    qk_candidate_name = "qk_candidate_msmarco_filtered_train"
    generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("train"),
                                          is_correct_fn, config)
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name,
                                      "train")
    generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("train"),
                                          is_correct_fn, config)
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name,
                                      "val")
Example #11
0
def multi_qck_gen(job_name, qk_candidate_name, ranked_list_path, split,
                  k_group_size):
    claim_ids = load_claim_ids_for_split(split)
    cids: List[str] = lmap(str, claim_ids)
    qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name)
    print("cids", len(cids))
    print("len(qk_candidate)", len(qk_candidate))
    print("Generate instances : ", split)
    generator = QCKGeneratorGrouped(
        get_qck_candidate_from_ranked_list_path(ranked_list_path),
        is_correct_factory(), False, k_group_size)
    qk_candidate_train: List[QKUnit] = list(
        [qk for qk in qk_candidate if qk[0].query_id in cids])

    def worker_factory(out_dir):
        return QCKWorker(qk_candidate_train, generator, out_dir)

    num_jobs = d_n_claims_per_split2[split]
    runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split,
                        worker_factory)
    runner.start()
Example #12
0
def qck_gen_w_ranked_list_multiple(job_name, qk_candidate_name,
                                   ranked_list_path, split, n_qk_per_job):
    claim_ids = load_claim_ids_for_split(split)
    cids: List[str] = lmap(str, claim_ids)
    qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name)
    print("cids", len(cids))
    print("len(qk_candidate)", len(qk_candidate))
    print("Generate instances : ", split)
    generator = QCKInstanceGenerator(
        get_qck_candidate_from_ranked_list_path(ranked_list_path),
        is_correct_factory())
    qk_candidate_train: List[QKUnit] = list(
        [qk for qk in qk_candidate if qk[0].query_id in cids])

    def worker_factory(out_dir):
        return QCKWorkerMultiple(qk_candidate_train, generator, n_qk_per_job,
                                 out_dir)

    num_qks = d_n_claims_per_split2[split]
    num_jobs = ceil_divide(num_qks, n_qk_per_job)
    runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split,
                        worker_factory)
    runner.start()
Example #13
0
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_train_val(generator, "qcknc")
Example #14
0
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_factory())
    qk_candidate_name = "perspective_qk_stage2_dev"
    sub_split = "dev"
    run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name, "qcknc")
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    # Selected from doc_scorer_summarizer.py
    qk_candidate_name = "perspective_qk_candidate_train_dense"
    start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc_dense")