Exemple #1
0
def sanity_check():
    dvp: List[DocValueParts2] = load()
    candidate_d_raw: List[Tuple[int, List[int]]] = get_eval_candidate_as_pids(
        "train")
    candidate_d = {str(k): lmap(str, v) for k, v in candidate_d_raw}

    # Group by doc id
    dvp_qid_grouped: Dict[str, List[DocValueParts2]] = group_by(dvp, get_qid)

    ap_baseline = []
    ap_new_score = []
    for qid, entries in dvp_qid_grouped.items():
        ranked_list_new = []
        ranked_list_baseline = []

        candidate_id_grouped = group_by(entries, get_candidate)
        for candidate_id, entries2 in candidate_id_grouped.items():
            is_initial_candidate = candidate_id in candidate_d[qid]
            gold = entries2[0].label
            skip = gold and not is_initial_candidate

            def get_new_score(dvp: DocValueParts2):
                return dvp.score

            def get_baseline_score(dvp: DocValueParts2):
                return dvp.init_score

            if skip:
                continue

            new_score = top_k_avg(lmap(get_new_score, entries2))
            baseline_score = average(lmap(get_baseline_score, entries2))
            ranked_list_new.append((candidate_id, new_score, gold))
            ranked_list_baseline.append((candidate_id, baseline_score, gold))

        def get_ap(ranked_list):
            ranked_list.sort(key=lambda x: x[1], reverse=True)

            p_list = []
            p = 0
            for rank, (cid, score, gold) in enumerate(ranked_list):
                if gold:
                    p += 1
                    p_list.append(p / (rank + 1))
            return average(p_list)

        ap_baseline.append(get_ap(ranked_list_baseline))
        ap_new_score.append(get_ap(ranked_list_new))

    print("MAP baseline", average(ap_baseline))
    print("MAP new score", average(ap_new_score))
Exemple #2
0
def group_by_docs():
    dvp: List[DocValueParts2] = load()
    candidate_d_raw: List[Tuple[int, List[int]]] = get_eval_candidate_as_pids(
        "train")
    candidate_d = {str(k): lmap(str, v) for k, v in candidate_d_raw}

    # Group by doc id
    dvp_qid_grouped: Dict[str, List[DocValueParts2]] = group_by(dvp, get_qid)

    def simple(doc_id):
        return doc_id.split("-")[-1]

    c_all = Counter()
    rows = []
    for qid, entries in dvp_qid_grouped.items():
        # Q : How many kdp are useful?
        # Q : Does relevant matter?
        candidate_id_grouped = group_by(entries, get_doc_id)
        rows.append(["qid", qid])

        for doc_id_idx, entries2 in candidate_id_grouped.items():
            #c = Counter([good_or_bad(e.score-e.init_score, e.label) for e in entries2])
            c = Counter([
                get_decision_change(e.label, e.init_score, e.score)
                for e in entries2
            ])
            rows.append([doc_id_idx])
            #row = [doc_id_idx, c["good"], c["bad"], c["no change"]]
            row = [
                doc_id_idx, c["decision_change_good"],
                c["decision_change_bad"], c["no_change"]
            ]
            rows.append(row)
            for k, v in c.items():
                c_all[k] += v

    row = [
        "summary", c_all["decision_change_good"], c_all["decision_change_bad"],
        c_all["no_change"]
    ]
    rows = [row] + rows

    print_table(rows)
Exemple #3
0
def main():
    trec_path = sys.argv[1]
    ranked_list = load_ranked_list(trec_path)
    candidate_d_raw: Dict[Tuple[int, List[int]]] = dict(
        get_eval_candidate_as_pids("dev"))
    label_d: Dict[int, List[int]] = get_claim_perspective_id_dict2()

    ex_candiate_entry = defaultdict(list)
    for entry in ranked_list:
        cid = int(entry.query_id)
        pid = int(entry.doc_id)
        label = pid in label_d[cid]

        # show entry which are true and not in original candidate
        if label and pid not in candidate_d_raw[cid]:
            ex_candiate_entry[cid].append(entry.rank)

    for cid, ranks in ex_candiate_entry.items():
        print(cid, ranks)
Exemple #4
0
def group_by_cids():
    dvp: List[DocValueParts2] = load()
    candidate_d_raw: List[Tuple[int, List[int]]] = get_eval_candidate_as_pids(
        "train")
    candidate_d = {str(k): lmap(str, v) for k, v in candidate_d_raw}

    # Group by doc id
    dvp_qid_grouped: Dict[str, List[DocValueParts2]] = group_by(dvp, get_qid)

    def simple(doc_id):
        return doc_id.split("-")[-1]

    rows = []
    for qid, entries in dvp_qid_grouped.items():
        # Q : How many kdp are useful?
        # Q : Does relevant matter?
        candidate_id_grouped = group_by(entries, get_candidate)
        rows.append([qid])
        for candidate_id, entries2 in candidate_id_grouped.items():
            is_initial_candidate = candidate_id in candidate_d[qid]
            avg_score = average(lmap(lambda x: x.score, entries2))

            rows.append(['candidate id:', candidate_id])
            rows.append(['is_initial_candidate', is_initial_candidate])
            rows.append([
                "doc_id", "score", "gold", "init_pred", "direction", "decision"
            ])
            for e in entries2:
                s = "{}_{}".format(simple(e.kdp.doc_id), e.kdp.passage_idx)
                row = [
                    s, "{0:.2f}".format(e.score), e.label,
                    to_pred(e.init_score),
                    direction(e.score, e.init_score),
                    to_pred(e.score)
                ]

                rows.append(row)

    print_table(rows)
Exemple #5
0
def avg_scores():
    dvp: List[DocValueParts2] = load()
    candidate_d_raw: List[Tuple[int, List[int]]] = get_eval_candidate_as_pids(
        "train")
    candidate_d = {str(k): lmap(str, v) for k, v in candidate_d_raw}

    # Group by doc id
    dvp_qid_grouped: Dict[str, List[DocValueParts2]] = group_by(dvp, get_qid)

    rows = []
    for qid, entries in dvp_qid_grouped.items():
        # Q : How many kdp are useful?
        # Q : Does relevant matter?
        candidate_id_grouped = group_by(entries, get_candidate)
        c = Counter()
        new_rows = []
        new_rows.append(["candidate id", "init_score", "avg_score"])

        for candidate_id, entries2 in candidate_id_grouped.items():
            label = entries2[0].label
            avg_score = average(lmap(lambda x: x.score, entries2))
            initial_score = entries2[0].init_score
            change = avg_score - initial_score
            value_type = good_or_bad(change, label, 0.01)
            c[value_type] += 1
            row = [
                candidate_id, label, value_type,
                four_digit_float(initial_score),
                four_digit_float(avg_score)
            ]
            new_rows.append(row)

        row = [qid, c['good'], c['bad'], c['no change']]
        rows.append(row)
        rows.extend(new_rows)
    print_table(rows)
Exemple #6
0
 def functor(cid_to_passage) -> CPPNCGeneratorInterface:
     candidate_pers = dict(get_eval_candidate_as_pids("train"))
     return ppnc_datagen_50_perspective.Generator(cid_to_passage,
                                                  candidate_pers)
Exemple #7
0
 def functor(cid_to_passage) -> CPPNCGeneratorInterface:
     candidate_pers = dict(get_eval_candidate_as_pids("dev"))
     return multi_evidence.Generator(cid_to_passage, candidate_pers, False)
Exemple #8
0
 def functor(cid_to_passage) -> CPPNCGeneratorInterface:
     candidate_pers = dict(get_eval_candidate_as_pids("train"))
     return cppnc_datagen.Generator(cid_to_passage, candidate_pers, False)
Exemple #9
0
def get_eval_candidates_as_qck(split) -> Dict[str, List[QCKCandidate]]:
    candidate_pers: List[Tuple[int,
                               List[int]]] = get_eval_candidate_as_pids(split)
    return cid_pid_format_to_qck(candidate_pers)