Example #1
0
def get_claim_lms() -> Dict[str, Counter]:
    split = "train"
    qids = list(get_qids_for_split(split_name2, split))
    cids = lmap(int, qids)
    claims = get_claims_from_ids(cids)
    claim_lms = build_gold_lms(claims)
    claim_lms_dict: Dict[str, Counter] = {str(claim_lm.cid): claim_lm.LM for claim_lm in claim_lms}
    return claim_lms_dict
Example #2
0
def main():
    split = "train"
    all_qk = load_all_qk()
    qids = list(get_qids_for_split(split_name2, split))
    qks_for_split = list([qk for qk in all_qk if qk[0].query_id in qids])
    query_lms: Dict[str, Counter] = get_claim_lms()
    print(len(qks_for_split), len(query_lms))
    filtered_qk_candidate = filter_qk_rel(qks_for_split, query_lms, 50)
    save_to_pickle(filtered_qk_candidate, "pc_qk3_filtered_rel_{}".format(split))
Example #3
0
def main():
    save_dir = os.path.join(output_path, "pc_qc4")
    exist_or_mkdir(save_dir)
    split_filename = split_name2
    for split in splits:
        qids: Iterable[str] = get_qids_for_split(split_filename, split)
        queries = get_qck_queries_from_cids(lmap(int, qids))
        eval_candidate = get_qck_candidate_for_split(split_filename, split)
        save_path = os.path.join(save_dir, split)
        make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
Example #4
0
def qck_gen_w_ranked_list(job_name,
                          qk_candidates: List[QKUnit],
                          qck_candidates_dict: Dict[str, List[QCKCandidate]],
                          split):
    qids = list(get_qids_for_split(split_name2, split))
    print("Generate instances : ", split)
    generator = QCKInstanceGenerator(qck_candidates_dict, is_correct_factory())
    qk_candidates_for_split: List[QKUnit] = list([qk for qk in qk_candidates if qk[0].query_id in qids])
    print("{} of {} qk are used".format(len(qk_candidates_for_split), len(qk_candidates)))

    def worker_factory(out_dir):
        return QCKWorker(qk_candidates_for_split,
                         generator,
                         out_dir)

    num_jobs = len(qids)
    runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory)
    runner.start()