Esempio n. 1
0
def main():
    qk_candidate_name = "qk_candidate_msmarco_filtered_dev"
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"),
                                     is_correct_factory())
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3",
                                      "dev")

    # Selected from doc_scorer_summarizer.py
    qk_candidate_name = "qk_candidate_msmarco_filtered_train"
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_train(generator, qk_candidate_name, "qcknc3")
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc3")
Esempio n. 2
0
def main():
    qrel_path = "/home/youngwookim/Downloads/rob04-desc/qrels.rob04.txt"
    judgement = load_qrels_structured(qrel_path)

    def is_correct(query: QCKQuery, candidate: QCKCandidate):
        qid = query.query_id
        doc_part_id = candidate.id
        doc_id = "_".join(doc_part_id.split("_")[:-1])
        if qid not in judgement:
            return 0
        d = judgement[qid]
        if doc_id in d:
            return d[doc_id]
        else:
            return 0

    qk_candidate: List[QKUnit] = load_from_pickle(
        "robust_on_clueweb_qk_candidate")
    candidate_dict: \
        Dict[str, List[QCKCandidateI]] = load_candidate_all_passage_from_qrel(256)
    generator = QCKInstanceGenerator(candidate_dict, is_correct)
    num_jobs = 250

    def worker_factory(out_dir):
        worker = QCKWorker(qk_candidate, generator, out_dir)
        return worker

    ##
    job_name = "robust_qck_6"
    runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory)
    runner.start()
Esempio n. 3
0
def main():
    qrel_path = "/home/youngwookim/Downloads/rob04-desc/qrels.rob04.txt"
    judgement = load_qrels_structured(qrel_path)

    def is_correct(query: QCKQuery, candidate: QCKCandidate):
        qid = query.query_id
        doc_id = candidate.id
        if qid not in judgement:
            return 0
        d = judgement[qid]
        label = 1 if doc_id in d and d[doc_id] > 0 else 0
        return label

    qk_candidate: List[QKUnit] = load_from_pickle(
        "robust_on_clueweb_qk_candidate_filtered")

    candidate_dict = load_cache("candidate_for_robust_qck_7")
    if candidate_dict is None:
        candidate_dict: \
            Dict[str, List[QCKCandidateI]] = get_candidate_all_passage_w_samping()
        save_to_pickle(candidate_dict, "candidate_for_robust_qck_7")

    generator = QCKInstanceGenerator(candidate_dict, is_correct)
    num_jobs = 250

    def worker_factory(out_dir):
        worker = QCKWorker(qk_candidate, generator, out_dir)
        return worker

    ##
    job_name = "robust_qck_10"
    runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory)
    runner.start()
Esempio n. 4
0
def make_test():
    split = "test"
    qk_candidate_name = "qk_candidate_msmarco_filtered_{}".format(split)
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split),
                                     is_correct_factory())
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3",
                                      split)
Esempio n. 5
0
def main():
    for split in ["dev", "test"]:
        generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split),
                                         is_correct_factory())
        # Selected from doc_scorer_summarizer.py
        qk_candidate_name = "pc_qk2_{}_cpnc12_filtered".format(split)
        start_generate_jobs(generator, split, qk_candidate_name, "qck11")
Esempio n. 6
0
def main():
    sub_split = "dev"
    name_prefix = "qcknc2"
    qk_candidate_name = "qk_stage2_dev_2"

    generator = QCKInstanceGenerator(get_eval_candidates_as_qck(sub_split),
                                     is_correct_factory())
    run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name,
                               name_prefix)
Esempio n. 7
0
def main():
    qrel = QRel()
    qk_candidate: List[QKUnit] = load_qk_robust_heldout("651")
    candidate_dict = load_candidate_head_as_doc()
    generator = QCKInstanceGenerator(candidate_dict, qrel.is_correct)
    num_jobs = 250

    def worker_factory(out_dir):
        worker = QCKWorker(qk_candidate, generator, out_dir)
        return worker

    ##
    job_name = "robust_qck_2"
    runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory)
    runner.start()
Esempio n. 8
0
def qck_gen_w_ranked_list(job_name,
                          qk_candidates: List[QKUnit],
                          qck_candidates_dict: Dict[str, List[QCKCandidate]],
                          split):
    qids = list(get_qids_for_split(split_name2, split))
    print("Generate instances : ", split)
    generator = QCKInstanceGenerator(qck_candidates_dict, is_correct_factory())
    qk_candidates_for_split: List[QKUnit] = list([qk for qk in qk_candidates if qk[0].query_id in qids])
    print("{} of {} qk are used".format(len(qk_candidates_for_split), len(qk_candidates)))

    def worker_factory(out_dir):
        return QCKWorker(qk_candidates_for_split,
                         generator,
                         out_dir)

    num_jobs = len(qids)
    runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory)
    runner.start()
Esempio n. 9
0
def main():
    is_correct_fn = get_is_correct_fn()
    split = "train"
    qk_candidate = load_from_pickle("pc_evi_filtered_qk_{}".format(split))
    tprint("Loading candidates..")
    candidate_dict = load_bal_candidate(split)
    tprint("{} dict keys".format(len(candidate_dict)))

    tprint("Initializing generator..")
    generator = QCKInstanceGenerator(candidate_dict, is_correct_fn)
    num_jobs = d_n_pc_per_split[split]

    def worker_factory(out_dir):
        worker = QCKWorker(qk_candidate, generator, out_dir)
        return worker

    job_name = "pc_evi_qck2_s_{}".format(split)
    runner = JobRunnerS(job_man_dir, num_jobs, job_name, worker_factory)
    runner.start()
Esempio n. 10
0
def main():
    is_correct_fn = get_is_correct_fn()
    qk_per_split = get_qk_per_split("pc_evidence_qk")
    for split in splits[1:]:
        qk_candidate: List[QKUnit] = qk_per_split[split]
        tprint("Loading candidates..")
        candidate_dict = load_top_rank_candidate(split)
        tprint("{} dict keys".format(len(candidate_dict)))

        tprint("Initializing generator..")
        generator = QCKInstanceGenerator(candidate_dict, is_correct_fn)
        num_jobs = d_n_pc_per_split[split]

        def worker_factory(out_dir):
            worker = QCKWorker(qk_candidate, generator, out_dir)
            return worker

        job_name = "pc_evi_qck_predict_{}".format(split)
        runner = JobRunner2(job_man_dir, num_jobs, job_name, worker_factory)
        runner.start()
Esempio n. 11
0
def qck_gen_w_ranked_list(job_name, qk_candidate_name, ranked_list_path,
                          split):
    claim_ids = load_claim_ids_for_split(split)
    cids: List[str] = lmap(str, claim_ids)
    qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name)
    print("cids", len(cids))
    print("len(qk_candidate)", len(qk_candidate))
    print("Generate instances : ", split)
    generator = QCKInstanceGenerator(
        get_qck_candidate_from_ranked_list_path(ranked_list_path),
        is_correct_factory())
    qk_candidate_train: List[QKUnit] = list(
        [qk for qk in qk_candidate if qk[0].query_id in cids])

    def worker_factory(out_dir):
        return QCKWorker(qk_candidate_train, generator, out_dir)

    num_jobs = d_n_claims_per_split2[split]
    runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split,
                        worker_factory)
    runner.start()
Esempio n. 12
0
def main():
    is_correct_fn = get_is_correct_fn()
    for split in splits[:2]:
        qk_candidate = load_from_pickle("pc_evi_filtered_qk_{}".format(split))
        qk_candidate = sample_kdps(qk_candidate)
        tprint("Loading candidates..")
        candidate_dict = load_bal_candidate(split)
        tprint("{} dict keys".format(len(candidate_dict)))

        tprint("Initializing generator..")
        generator = QCKInstanceGenerator(candidate_dict, is_correct_fn)
        n_qk_per_job = 10
        num_jobs = ceil_divide(d_n_pc_per_split[split], n_qk_per_job)

        def worker_factory(out_dir):
            worker = QCKWorkerMultiple(qk_candidate, generator, n_qk_per_job, out_dir)
            return worker

        job_name = "pc_evi_qck3_{}".format(split)
        runner = JobRunnerS(job_man_dir, num_jobs, job_name, worker_factory)
        runner.start()
Esempio n. 13
0
def main():
    def is_correct(query: QCKQuery, candidate: QCKCandidate):
        return 0

    qk_candidate: List[QKUnit] = load_from_pickle(
        "robust_on_clueweb_qk_candidate_filtered")

    candidate_dict = load_cache("candidate_for_robust_qck_10_predict")
    if candidate_dict is None:
        candidate_dict: \
            Dict[str, List[QCKCandidateI]] = load_candidate_all_passage(256)
        save_to_pickle(candidate_dict, "candidate_for_robust_qck_10_predict")

    generator = QCKInstanceGenerator(candidate_dict, is_correct)
    num_jobs = 250

    def worker_factory(out_dir):
        worker = QCKWorker(qk_candidate, generator, out_dir)
        return worker

    ##
    job_name = "robust_qck_10_predict"
    runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory)
    runner.start()
Esempio n. 14
0
def start_job(job_name, split, candidate_dict, correct_d, qk_candidate):
    print("Loading data ....")

    def is_correct_fn(q: QCKQuery, c: QCKCandidate) -> bool:
        pair_id = q.query_id, c.id
        if pair_id in correct_d:
            return correct_d[pair_id]
        else:
            print("WARNING : key pair not found", pair_id)
            return False

    # transform payload to common QCK format
    generator = QCKInstanceGenerator(candidate_dict, is_correct_fn)

    print("Generate instances : ", split)

    def worker_factory(out_dir):
        return QCKWorker(qk_candidate,
                         generator,
                         out_dir)

    num_jobs = num_problems[split]
    runner = JobRunner(job_man_dir, num_jobs-1, job_name, worker_factory)
    runner.start()
Esempio n. 15
0
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_train_val(generator, "qcknc")
Esempio n. 16
0
def main():
    split = "train"
    candidate_d: Dict[
        str, List[QCKCandidate]] = get_extended_eval_candidate_as_qck(split)
    start_generate_jobs_for_val(
        QCKInstanceGenerator(candidate_d, is_correct_factory()), "qcknc_ex")
Esempio n. 17
0
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_factory())
    qk_candidate_name = "perspective_qk_stage2_dev"
    sub_split = "dev"
    run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name, "qcknc")
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    # Selected from doc_scorer_summarizer.py
    qk_candidate_name = "perspective_qk_candidate_train_dense"
    start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc_dense")