def main(): qk_candidate_name = "qk_candidate_msmarco_filtered_dev" generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_factory()) start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3", "dev") # Selected from doc_scorer_summarizer.py qk_candidate_name = "qk_candidate_msmarco_filtered_train" generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) start_generate_jobs_for_train(generator, qk_candidate_name, "qcknc3") generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc3")
def main(): qrel_path = "/home/youngwookim/Downloads/rob04-desc/qrels.rob04.txt" judgement = load_qrels_structured(qrel_path) def is_correct(query: QCKQuery, candidate: QCKCandidate): qid = query.query_id doc_part_id = candidate.id doc_id = "_".join(doc_part_id.split("_")[:-1]) if qid not in judgement: return 0 d = judgement[qid] if doc_id in d: return d[doc_id] else: return 0 qk_candidate: List[QKUnit] = load_from_pickle( "robust_on_clueweb_qk_candidate") candidate_dict: \ Dict[str, List[QCKCandidateI]] = load_candidate_all_passage_from_qrel(256) generator = QCKInstanceGenerator(candidate_dict, is_correct) num_jobs = 250 def worker_factory(out_dir): worker = QCKWorker(qk_candidate, generator, out_dir) return worker ## job_name = "robust_qck_6" runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory) runner.start()
def main(): qrel_path = "/home/youngwookim/Downloads/rob04-desc/qrels.rob04.txt" judgement = load_qrels_structured(qrel_path) def is_correct(query: QCKQuery, candidate: QCKCandidate): qid = query.query_id doc_id = candidate.id if qid not in judgement: return 0 d = judgement[qid] label = 1 if doc_id in d and d[doc_id] > 0 else 0 return label qk_candidate: List[QKUnit] = load_from_pickle( "robust_on_clueweb_qk_candidate_filtered") candidate_dict = load_cache("candidate_for_robust_qck_7") if candidate_dict is None: candidate_dict: \ Dict[str, List[QCKCandidateI]] = get_candidate_all_passage_w_samping() save_to_pickle(candidate_dict, "candidate_for_robust_qck_7") generator = QCKInstanceGenerator(candidate_dict, is_correct) num_jobs = 250 def worker_factory(out_dir): worker = QCKWorker(qk_candidate, generator, out_dir) return worker ## job_name = "robust_qck_10" runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory) runner.start()
def make_test(): split = "test" qk_candidate_name = "qk_candidate_msmarco_filtered_{}".format(split) generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split), is_correct_factory()) start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3", split)
def main(): for split in ["dev", "test"]: generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split), is_correct_factory()) # Selected from doc_scorer_summarizer.py qk_candidate_name = "pc_qk2_{}_cpnc12_filtered".format(split) start_generate_jobs(generator, split, qk_candidate_name, "qck11")
def main(): sub_split = "dev" name_prefix = "qcknc2" qk_candidate_name = "qk_stage2_dev_2" generator = QCKInstanceGenerator(get_eval_candidates_as_qck(sub_split), is_correct_factory()) run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name, name_prefix)
def main(): qrel = QRel() qk_candidate: List[QKUnit] = load_qk_robust_heldout("651") candidate_dict = load_candidate_head_as_doc() generator = QCKInstanceGenerator(candidate_dict, qrel.is_correct) num_jobs = 250 def worker_factory(out_dir): worker = QCKWorker(qk_candidate, generator, out_dir) return worker ## job_name = "robust_qck_2" runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory) runner.start()
def qck_gen_w_ranked_list(job_name, qk_candidates: List[QKUnit], qck_candidates_dict: Dict[str, List[QCKCandidate]], split): qids = list(get_qids_for_split(split_name2, split)) print("Generate instances : ", split) generator = QCKInstanceGenerator(qck_candidates_dict, is_correct_factory()) qk_candidates_for_split: List[QKUnit] = list([qk for qk in qk_candidates if qk[0].query_id in qids]) print("{} of {} qk are used".format(len(qk_candidates_for_split), len(qk_candidates))) def worker_factory(out_dir): return QCKWorker(qk_candidates_for_split, generator, out_dir) num_jobs = len(qids) runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory) runner.start()
def main(): is_correct_fn = get_is_correct_fn() split = "train" qk_candidate = load_from_pickle("pc_evi_filtered_qk_{}".format(split)) tprint("Loading candidates..") candidate_dict = load_bal_candidate(split) tprint("{} dict keys".format(len(candidate_dict))) tprint("Initializing generator..") generator = QCKInstanceGenerator(candidate_dict, is_correct_fn) num_jobs = d_n_pc_per_split[split] def worker_factory(out_dir): worker = QCKWorker(qk_candidate, generator, out_dir) return worker job_name = "pc_evi_qck2_s_{}".format(split) runner = JobRunnerS(job_man_dir, num_jobs, job_name, worker_factory) runner.start()
def main(): is_correct_fn = get_is_correct_fn() qk_per_split = get_qk_per_split("pc_evidence_qk") for split in splits[1:]: qk_candidate: List[QKUnit] = qk_per_split[split] tprint("Loading candidates..") candidate_dict = load_top_rank_candidate(split) tprint("{} dict keys".format(len(candidate_dict))) tprint("Initializing generator..") generator = QCKInstanceGenerator(candidate_dict, is_correct_fn) num_jobs = d_n_pc_per_split[split] def worker_factory(out_dir): worker = QCKWorker(qk_candidate, generator, out_dir) return worker job_name = "pc_evi_qck_predict_{}".format(split) runner = JobRunner2(job_man_dir, num_jobs, job_name, worker_factory) runner.start()
def qck_gen_w_ranked_list(job_name, qk_candidate_name, ranked_list_path, split): claim_ids = load_claim_ids_for_split(split) cids: List[str] = lmap(str, claim_ids) qk_candidate: List[QKUnit] = load_from_pickle(qk_candidate_name) print("cids", len(cids)) print("len(qk_candidate)", len(qk_candidate)) print("Generate instances : ", split) generator = QCKInstanceGenerator( get_qck_candidate_from_ranked_list_path(ranked_list_path), is_correct_factory()) qk_candidate_train: List[QKUnit] = list( [qk for qk in qk_candidate if qk[0].query_id in cids]) def worker_factory(out_dir): return QCKWorker(qk_candidate_train, generator, out_dir) num_jobs = d_n_claims_per_split2[split] runner = JobRunnerS(job_man_dir, num_jobs, job_name + "_" + split, worker_factory) runner.start()
def main(): is_correct_fn = get_is_correct_fn() for split in splits[:2]: qk_candidate = load_from_pickle("pc_evi_filtered_qk_{}".format(split)) qk_candidate = sample_kdps(qk_candidate) tprint("Loading candidates..") candidate_dict = load_bal_candidate(split) tprint("{} dict keys".format(len(candidate_dict))) tprint("Initializing generator..") generator = QCKInstanceGenerator(candidate_dict, is_correct_fn) n_qk_per_job = 10 num_jobs = ceil_divide(d_n_pc_per_split[split], n_qk_per_job) def worker_factory(out_dir): worker = QCKWorkerMultiple(qk_candidate, generator, n_qk_per_job, out_dir) return worker job_name = "pc_evi_qck3_{}".format(split) runner = JobRunnerS(job_man_dir, num_jobs, job_name, worker_factory) runner.start()
def main(): def is_correct(query: QCKQuery, candidate: QCKCandidate): return 0 qk_candidate: List[QKUnit] = load_from_pickle( "robust_on_clueweb_qk_candidate_filtered") candidate_dict = load_cache("candidate_for_robust_qck_10_predict") if candidate_dict is None: candidate_dict: \ Dict[str, List[QCKCandidateI]] = load_candidate_all_passage(256) save_to_pickle(candidate_dict, "candidate_for_robust_qck_10_predict") generator = QCKInstanceGenerator(candidate_dict, is_correct) num_jobs = 250 def worker_factory(out_dir): worker = QCKWorker(qk_candidate, generator, out_dir) return worker ## job_name = "robust_qck_10_predict" runner = JobRunner(job_man_dir, num_jobs, job_name, worker_factory) runner.start()
def start_job(job_name, split, candidate_dict, correct_d, qk_candidate): print("Loading data ....") def is_correct_fn(q: QCKQuery, c: QCKCandidate) -> bool: pair_id = q.query_id, c.id if pair_id in correct_d: return correct_d[pair_id] else: print("WARNING : key pair not found", pair_id) return False # transform payload to common QCK format generator = QCKInstanceGenerator(candidate_dict, is_correct_fn) print("Generate instances : ", split) def worker_factory(out_dir): return QCKWorker(qk_candidate, generator, out_dir) num_jobs = num_problems[split] runner = JobRunner(job_man_dir, num_jobs-1, job_name, worker_factory) runner.start()
def main(): generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) start_generate_jobs_for_train_val(generator, "qcknc")
def main(): split = "train" candidate_d: Dict[ str, List[QCKCandidate]] = get_extended_eval_candidate_as_qck(split) start_generate_jobs_for_val( QCKInstanceGenerator(candidate_d, is_correct_factory()), "qcknc_ex")
def main(): generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_factory()) qk_candidate_name = "perspective_qk_stage2_dev" sub_split = "dev" run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name, "qcknc")
def main(): generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"), is_correct_factory()) # Selected from doc_scorer_summarizer.py qk_candidate_name = "perspective_qk_candidate_train_dense" start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc_dense")