Beispiel #1
0
def main():
    qk_candidate_name = "qk_candidate_msmarco_filtered_dev"
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"),
                                     is_correct_factory())
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3",
                                      "dev")

    # Selected from doc_scorer_summarizer.py
    qk_candidate_name = "qk_candidate_msmarco_filtered_train"
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_train(generator, qk_candidate_name, "qcknc3")
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc3")
Beispiel #2
0
def main(config):
    split = config['split']
    top_k = config['top_k']
    word_prob_path = config['word_prob_path']
    run_name = config['run_name']
    save_path = config['save_path']
    if top_k == 50:
        candidate_d: Dict[str, List[QCKCandidate]] = get_eval_candidates_as_qck(split)
    elif top_k == 1000:
        candidate_d: Dict[str, List[QCKCandidate]] = get_eval_candidates_1k_as_qck(split)
    else:
        assert False

    per_query_infos: Dict[str, Dict[WordAsID, np.array]] = load_pickle_from(word_prob_path)

    all_ranked_list_entries = []

    for query_id, d in per_query_infos.items():
        scorer = Scorer(d, True)
        candidates: List[QCKCandidate] = candidate_d[query_id]

        entries = []
        for c in candidates:
            e = c.id, scorer.score(c.text)
            entries.append(e)
        entries.sort(key=get_second, reverse=True)

        ranked_list_entries = scores_to_ranked_list_entries(entries, run_name, query_id)
        all_ranked_list_entries.extend(ranked_list_entries)

    write_trec_ranked_list_entry(all_ranked_list_entries, save_path)
Beispiel #3
0
def make_test():
    split = "test"
    qk_candidate_name = "qk_candidate_msmarco_filtered_{}".format(split)
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split),
                                     is_correct_factory())
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, "qcknc3",
                                      split)
Beispiel #4
0
def main():
    for split in ["dev", "test"]:
        generator = QCKInstanceGenerator(get_eval_candidates_as_qck(split),
                                         is_correct_factory())
        # Selected from doc_scorer_summarizer.py
        qk_candidate_name = "pc_qk2_{}_cpnc12_filtered".format(split)
        start_generate_jobs(generator, split, qk_candidate_name, "qck11")
Beispiel #5
0
def main():
    save_dir = os.path.join(output_path, "pc_qc")
    exist_or_mkdir(save_dir)
    for split in splits:
        queries = get_qck_queries(split)
        eval_candidate = get_eval_candidates_as_qck(split)
        save_path = os.path.join(save_dir, split)
        make_pc_qc(queries, eval_candidate, is_correct_factory(), save_path)
Beispiel #6
0
def main(config):
    job_name = "qck_multi"
    is_correct_fn = is_correct_factory()
    qk_candidate_name = "qk_candidate_msmarco_filtered_dev"
    generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("dev"),
                                          is_correct_fn, config)
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name,
                                      "dev")

    qk_candidate_name = "qk_candidate_msmarco_filtered_train"
    generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("train"),
                                          is_correct_fn, config)
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name,
                                      "train")
    generator = MultiDocInstanceGenerator(get_eval_candidates_as_qck("train"),
                                          is_correct_fn, config)
    start_generate_jobs_for_sub_split(generator, qk_candidate_name, job_name,
                                      "val")
Beispiel #7
0
def main():
    sub_split = "dev"
    name_prefix = "qcknc2"
    qk_candidate_name = "qk_stage2_dev_2"

    generator = QCKInstanceGenerator(get_eval_candidates_as_qck(sub_split),
                                     is_correct_factory())
    run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name,
                               name_prefix)
Beispiel #8
0
def main():
    print("get query lms")
    split = "train"
    qk_candidate: List[QKUnit] = load_from_pickle(
        "perspective_qk_candidate_filtered_{}".format(split))
    # qk_candidate: List[QKUnit] = load_from_pickle("perspective_qk_candidate_{}".format(split))
    candidate_dict: Dict[str, List[QCKCandidate]] = get_eval_candidates_as_qck(
        "train")
    query_lms: Dict[str, Counter] = kdp_to_lm(qk_candidate)
    valid_qids: List[str] = list(query_lms.keys())
    target_candidate_dict = {}
    for k, c, in candidate_dict.items():
        if k in valid_qids:
            target_candidate_dict[k] = c
    alpha = 0.1
    q_ranked_list = rank_with_query_lm(query_lms, target_candidate_dict, 999,
                                       alpha)
    qrel_path = os.path.join(data_path, "perspective", "qrel.txt")
    qrels: QRelsFlat = load_qrels_flat(qrel_path)
    score = get_map(q_ranked_list, qrels)
    print(score)
Beispiel #9
0
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    start_generate_jobs_for_train_val(generator, "qcknc")
Beispiel #10
0
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("dev"), is_correct_factory())
    qk_candidate_name = "perspective_qk_stage2_dev"
    sub_split = "dev"
    run_jobs_with_qk_candidate(generator, sub_split, qk_candidate_name, "qcknc")
def main():
    generator = QCKInstanceGenerator(get_eval_candidates_as_qck("train"),
                                     is_correct_factory())
    # Selected from doc_scorer_summarizer.py
    qk_candidate_name = "perspective_qk_candidate_train_dense"
    start_generate_jobs_for_val(generator, qk_candidate_name, "qcknc_dense")