Esempio n. 1
0
def evaluate2(predictions):
    gold = get_claim_perspective_id_dict()
    tot_p = tot_r = tot_count = 0
    for c_Id, p_Id_list in predictions:
        gold_pids = gold[c_Id]

        covered = [False for _c in gold_pids]
        for pid in p_Id_list:
            for idx, cluster in enumerate(gold_pids):
                if pid in cluster:
                    covered[idx] = True
        tot_gold = len(covered)
        tot_pred = len(p_Id_list)
        hit = [h for h in covered if h]

        if tot_pred == 0:
            tot_p += 1
        else:
            tot_p += len(hit) / tot_pred

        if tot_gold == 0:
            tot_r += 1
        else:
            tot_r += len(hit) / tot_gold

    mean_p = tot_p / len(predictions)
    mean_r = tot_r / len(predictions)
    mean_f1 = 2 * mean_p * mean_r / (mean_p + mean_r)

    return {'precision': mean_p, 'recall': mean_r, 'f1': mean_f1}
Esempio n. 2
0
def save_to_csv():
    gold = get_claim_perspective_id_dict()

    def routine(claims, out_path):
        payloads = predict_by_elastic_search(claims, 50)
        head = ['sentence1', 'sentence2', 'gold_label', 'cid', 'pid']
        rows = []
        for cid, data_list in payloads:
            gold_pids = gold[cid]
            all_pid_set = set(flatten(gold_pids))
            for p_entry in data_list:
                c_text = p_entry['claim_text']
                p_text = p_entry['perspective_text']
                y = 1 if p_entry['pid'] in all_pid_set else 0
                row = [c_text, p_text, y, cid, p_entry['pid']]
                rows.append(row)
        f_out = csv.writer(open(out_path, "w", encoding="utf-8"),
                           dialect='excel-tab')
        f_out.writerows([head] + rows)

    claims, val = train_split()
    routine(claims, get_file_path('train'))
    d_ids: List[int] = list(load_dev_claim_ids())
    claims = get_claims_from_ids(d_ids)
    routine(claims, get_file_path('dev'))
    d_ids: List[int] = list(load_test_claim_ids())
    claims = get_claims_from_ids(d_ids)
    routine(claims, get_file_path('test'))
Esempio n. 3
0
def inspect(predictions):
    gold = get_claim_perspective_id_dict()

    suc_counter = SuccessCounter()
    for c_Id, prediction_list in predictions:
        gold_pids = gold[c_Id]

        def is_valid(pid):
            for cluster in gold_pids:
                if pid in cluster:
                    return True
            return False

        top_pred = prediction_list[0]

        if is_valid(top_pred['pid']):
            suc_counter.suc()
        else:
            suc_counter.fail()
            prediction = prediction_list[0]
            claim_text = prediction['claim_text']
            print("Claim {}: ".format(c_Id), claim_text)
            print("{0:.2f} {1} {2}".format(prediction['score'],
                                           prediction['rationale'],
                                           prediction['perspective_text']))
            print()

    print("P@1", suc_counter.get_suc_prob())
Esempio n. 4
0
def debug_failture(predictions):
    gold = get_claim_perspective_id_dict()
    ap_list = []
    for c_Id, prediction_list in predictions:
        gold_pids = gold[c_Id]
        gold_pids_set: Set[int] = set(flatten(gold_pids))
        claim_text = prediction_list[0]['claim_text']
        print("Claim {}: ".format(c_Id), claim_text)
        correctness_list = lmap(lambda p: p['pid'] in gold_pids_set,
                                prediction_list)
        ap = get_ap(prediction_list, gold_pids, False)

        if not any(correctness_list):  # all wrong
            continue

        if ap > 0.9:
            continue

        def print_line(prediction):
            pid = prediction['pid']
            correct = pid in gold_pids_set
            if correct:
                correct_str = "Y"
            else:
                correct_str = "N"

            score = prediction['score']
            print(correct_str, score, score.name,
                  prediction['perspective_text'])

        foreach(print_line, prediction_list)
        ap_list.append(ap)

    map = average(ap_list)
    return {'map': map}
Esempio n. 5
0
 def __init__(self, cid_to_passages: Dict[int, List[Tuple[List[str],
                                                          float]]],
              candidate_perspective: Dict[int, List[int]], filer_good):
     self.gold = get_claim_perspective_id_dict()
     self.candidate_perspective = candidate_perspective
     self.cid_to_passages = cid_to_passages
     self.filter_good = filer_good
Esempio n. 6
0
    def __init__(
        self,
        cid_to_passages: Dict[int, List[Tuple[List[str], float]]],
    ):
        self.cid_to_passages = cid_to_passages

        self.all_cids = list(cid_to_passages.keys())
        self.gold = get_claim_perspective_id_dict()
Esempio n. 7
0
def get_trec_relevance_judgement() -> Iterable[TrecRelevanceJudgementEntry]:
    gold: Dict[int, List[List[int]]] = get_claim_perspective_id_dict()
    for cid, clusters in gold.items():
        query_id = str(cid)
        pids = set(flatten(clusters))
        for pid in pids:
            e = TrecRelevanceJudgementEntry(query_id, str(pid), 1)
            yield e
Esempio n. 8
0
def get_correctness_list(predictions, debug) -> List[List[int]]:
    gold = get_claim_perspective_id_dict()
    all_correctness_list = []
    for c_Id, prediction_list in predictions:
        gold_pids = gold[c_Id]
        correctness_list: List[int] = get_correctness(prediction_list,
                                                      gold_pids)
        all_correctness_list.append(correctness_list)
    return all_correctness_list
Esempio n. 9
0
def is_correct_factory():
    gold = get_claim_perspective_id_dict()

    def is_correct(query: QCKQuery, candidate: QCKCandidate) -> int:
        pid_cluster = gold[int(query.query_id)]
        return int(
            any([int(candidate.id) in cluster for cluster in pid_cluster]))

    return is_correct
Esempio n. 10
0
 def __init__(
     self,
     cid_to_passages: Dict[int, List[Tuple[List[str], float]]],
     candidate_perspective: Dict[int, List[int]],
 ):
     self.gold = get_claim_perspective_id_dict()
     self.candidate_perspective = candidate_perspective
     self.cid_to_passages = cid_to_passages
     self.tokenizer = get_tokenizer()
Esempio n. 11
0
def load_passage_score_d(
        cppnc_save_name,
        baseline_save_name) -> Dict[Tuple[str, str, int], float]:
    cid_grouped: Dict[str,
                      Dict[str,
                           List[Dict]]] = load_cppnc_score(cppnc_save_name)
    gold = get_claim_perspective_id_dict()
    baseline_cid_grouped = load_baseline(baseline_save_name)

    score_d: Dict[Tuple[str, str, int], float] = {}

    def get_score_from_entry(entry):
        logit = entry['logits']
        return scipy.special.softmax(logit)[1]

    for cid, pid_entries_d in cid_grouped.items():
        pid_entries_d: Dict[str, List[Dict]] = pid_entries_d
        baseline_pid_entries = baseline_cid_grouped[int(cid)]
        baseline_score_d = fetch_score(baseline_pid_entries)

        gold_pids = gold[int(cid)]

        value_arr_pid_row = []
        for pid, entries_for_pid in pid_entries_d.items():
            label = any([pid in pids for pids in gold_pids])
            base_score = baseline_score_d[int(pid)]

            def get_value_from_entry(entry) -> float:
                score = get_score_from_entry(entry)
                value = doc_value(score, base_score, int(label))
                return value

            cur_value_row: List[float] = lmap(get_value_from_entry,
                                              entries_for_pid)
            value_arr_pid_row.append(cur_value_row)

        value_arr_doc_row: List[List[float]] = list(
            map(list, zip(*value_arr_pid_row)))
        avg_value = lmap(average, value_arr_doc_row)

        doc_info = []
        for pid, entries_for_pid in pid_entries_d.items():
            for entry in entries_for_pid:
                e = entry['kdp'].doc_id, entry['kdp'].passage_idx
                doc_info.append(e)
            break

        assert len(avg_value) == len(doc_info)

        for value, (doc_id, passage_idx) in zip(avg_value, doc_info):
            key = cid, doc_id, passage_idx
            score_d[key] = value

    return score_d
Esempio n. 12
0
def get_average_precision_list(predictions, debug):
    gold = get_claim_perspective_id_dict()
    ap_list = []
    for c_Id, prediction_list in predictions:
        gold_pids = gold[c_Id]
        claim_text = prediction_list[0]['claim_text']
        if debug:
            print("Claim {}: ".format(c_Id), claim_text)
        ap = get_ap(prediction_list, gold_pids, debug)
        ap_list.append(ap)
    return ap_list
Esempio n. 13
0
def generate_pair_insts(split) -> Iterable[Instance]:
    pos_rate = 1
    neg1_rate = 3
    neg2_rate = 6
    ids: List[int] = list(load_claim_ids_for_split(split))
    id_dict: Dict[int, List[List[int]]] = get_claim_perspective_id_dict()

    def same_cluster_example() -> Iterator[Tuple[int, int]]:
        for claim_id in ids:
            clusters = id_dict[claim_id]
            for cluster in clusters:
                for p1, p2 in combinations(cluster, 2):
                    yield p1, p2

    def same_claim_different_cluster() -> Iterator[Tuple[int, int]]:
        for claim_id in ids:
            clusters = id_dict[claim_id]
            for cluster1, cluster2 in combinations(clusters, 2):
                for p1 in cluster1:
                    for p2 in cluster2:
                        yield p1, p2

    def different_claim() -> Iterator[Tuple[int, int]]:
        for cid1, cid2 in combinations(ids, 2):
            clusters1 = id_dict[cid1]
            clusters2 = id_dict[cid2]
            for p1 in flatten(clusters1):
                for p2 in flatten(clusters2):
                    yield p1, p2

    pos: List[Tuple[int, int]] = list(same_cluster_example())
    neg1: List[Tuple[int, int]] = list(same_claim_different_cluster())
    neg2: List[Tuple[int, int]] = list(different_claim())

    pos_len = len(pos)
    neg1_len = pos_len * neg1_rate
    neg2_len = pos_len * neg2_rate

    print("pos/neg1/neg2 = {}/{}/{}".format(pos_len, neg1_len, neg2_len))

    random.shuffle(neg1)
    random.shuffle(neg2)

    neg1 = neg1[:neg1_len]
    neg2 = neg2[:neg2_len]

    pos_data = list([Instance(pid1, pid2, 1) for pid1, pid2 in pos])
    neg_data = list([Instance(pid1, pid2, 0) for pid1, pid2 in neg1 + neg2])

    all_data = pos_data + neg_data
    random.shuffle(all_data)
    return all_data
Esempio n. 14
0
def main():
    baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data()
    gold = get_claim_perspective_id_dict()

    bin_keys = ["< 0.05", "< 0.50", "< 0.95", "< 1"]

    def bin_fn(item: float):
        if item > 0.95:
            return "< 1"
        elif item > 0.5:
            return "< 0.95"
        elif item > 0.05:
            return "< 0.50"
        else:
            return "< 0.05"

    for cid, pid_entries in cid_grouped.items():
        baseline_pid_entries = baseline_cid_grouped[cid]

        baseline_score_d = {}
        for cpid, a_thing_array in baseline_pid_entries:
            _, pid = cpid
            assert len(a_thing_array) == 1
            score = a_thing_array[0]['score']
            baseline_score_d[pid] = score

        gold_pids = gold[cid]

        def get_score_per_pid_entry(p_entries: Tuple[CPIDPair, List[Dict]]):
            cpid, entries = p_entries
            return average(lmap(lambda e: e['score'], entries))

        pid_entries.sort(key=get_score_per_pid_entry, reverse=True)

        s = "{} : {}".format(cid, claim_d[cid])
        print(s)
        head_row = [""] + bin_keys
        rows = [head_row]
        for cpid, things in pid_entries:
            histogram = BinHistogram(bin_fn)
            _, pid = cpid
            label = any([pid in pids for pids in gold_pids])
            label_str = bool_to_yn(label)
            base_score = baseline_score_d[pid]
            base_score_str = "{0:.2f}".format(base_score)
            scores: List[float] = lmap(lambda x: (x['score']), things)
            foreach(histogram.add, scores)
            row = [label_str, base_score_str] + [
                str(histogram.counter[bin_key]) for bin_key in bin_keys
            ]
            rows.append(row)
        print_table(rows)
Esempio n. 15
0
def main():
    baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data()
    gold = get_claim_perspective_id_dict()
    doc_scores = dict(doc_score_predictions())

    for cid, pid_entries in cid_grouped.items():
        baseline_pid_entries = baseline_cid_grouped[cid]

        baseline_score_d = {}
        for cpid, a_thing_array in baseline_pid_entries:
            _, pid = cpid
            assert len(a_thing_array) == 1
            score = a_thing_array[0]['score']
            baseline_score_d[pid] = score

        gold_pids = gold[cid]

        def get_score_per_pid_entry(p_entries: Tuple[CPIDPair, List[Dict]]):
            cpid, entries = p_entries
            return average(lmap(lambda e: e['score'], entries))

        pid_entries.sort(key=get_score_per_pid_entry, reverse=True)

        s = "{} : {}".format(cid, claim_d[cid])
        print(s)
        num_docs = len(pid_entries[0][1])
        doc_value_arr = list([list() for _ in range(num_docs)])
        labels = []
        for cpid, things in pid_entries:
            _, pid = cpid
            label = any([pid in pids for pids in gold_pids])
            labels.append(label)
            base_score = baseline_score_d[pid]
            for doc_idx, per_doc in enumerate(things):
                score = per_doc['score']
                value = doc_value(score, base_score, int(label))
                doc_value_arr[doc_idx].append(value)

        head = ["avg", "pred"] + lmap(bool_to_yn, labels)
        rows = [head]
        doc_score = doc_scores[cid]
        assert len(doc_value_arr) == len(doc_score)

        for pred_score, doc_values in zip(doc_score, doc_value_arr):
            avg = average(doc_values)
            row_float = [avg, pred_score] + doc_values
            row = lmap(lambda x: "{0}".format(x), row_float)
            rows.append(row)
        print_table(rows)
Esempio n. 16
0
def eval_classification(classifier, split):
    payloads = load_payload(split)
    gold = get_claim_perspective_id_dict()

    r = []
    for cid, data_list in payloads:
        gold_pids = gold[cid]
        all_pid_set = set(flatten(gold_pids))
        for p_entry in data_list:
            c_text = p_entry['claim_text']
            p_text = p_entry['perspective_text']
            z = classifier(c_text, p_text)
            y = 1 if p_entry['pid'] in all_pid_set else 0
            r.append((z, y))
    return get_scores(r)
Esempio n. 17
0
def tune_kernel_a():
    split = "train"
    payloads = load_payload(split)
    gold = get_claim_perspective_id_dict()

    r = []
    for cid, data_list in payloads:
        gold_pids = gold[cid]
        all_pid_set = set(flatten(gold_pids))
        for p_entry in data_list:
            c_text = p_entry['claim_text']
            p_text = p_entry['perspective_text']
            y = 1 if p_entry['pid'] in all_pid_set else 0
            r.append((c_text, p_text, y))
    tune_kernel_save(r)
Esempio n. 18
0
def predict_by_oracle_on_candidate(claims,
                                   top_k) -> List[Tuple[str, List[Dict]]]:
    gold: Dict[int, List[List[int]]] = get_claim_perspective_id_dict()

    def scorer(lucene_score, query_id) -> NamedNumber:
        claim_id, p_id = query_id.split("_")
        gold_pids = gold[int(claim_id)]
        score = 0
        for p_ids in gold_pids:
            if int(p_id) in p_ids:
                score = 1

        return NamedNumber(score, "")

    r = predict_interface(claims, top_k, scorer)
    return r
Esempio n. 19
0
def main():
    relevance_scores: Dict[CPIDPair, List[Tuple[Logits, Logits]]] = load_from_pickle("pc_relevance_score")
    gold = get_claim_perspective_id_dict()

    true_feature = []
    false_feature = []

    ticker = TimeEstimator(len(relevance_scores))
    for key in relevance_scores:
        ticker.tick()
        cid, pid = key

        gold_pids = flatten(gold[cid])
        gold_pids = list([int(pid) for pid in gold_pids])
        correct = pid in gold_pids
        scores: List[Tuple[List[float], List[float]]] = relevance_scores[key]

        c_count = 0
        p_count = 0
        pc_count = 0
        for c_logits, p_logits in scores:
            c_rel = softmax(c_logits)[1] > 0.5
            p_rel = softmax(p_logits)[1] > 0.5

            c_count += int(c_rel)
            p_count += int(p_rel)
            pc_count += int(c_rel and p_rel)

        if correct:
            true_feature.append(pc_count)
        else:
            false_feature.append(pc_count)

    all_feature = true_feature + false_feature
    all_feature.sort()
    mid = int(len(all_feature)/2)
    cut_off = all_feature[mid]

    tp = sum([int(t > cut_off) for t in true_feature])
    fp = sum([int(t > cut_off) for t in false_feature])
    tn = sum([int(t <= cut_off) for t in false_feature])
    fn = sum([int(t <= cut_off) for t in true_feature])

    print(tp, fp, tn, fn)
    print("true feature", average(true_feature))
    print("false feature", average(false_feature))
Esempio n. 20
0
def main():
    save_name = "qcknc_val"
    cid_grouped: Dict[str, Dict[str, List[Dict]]] = load_cppnc_score(save_name)
    baseline_cid_grouped: Dict[int, List] = load_baseline("train_baseline")

    # baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_related_data()
    gold = get_claim_perspective_id_dict()

    columns = ["cid", "doc_id", "num_good-num_bad"]
    rows = [columns]
    for cid_s, pid_entries in cid_grouped.items():
        cid = int(cid_s)
        baseline_pid_entries = baseline_cid_grouped[cid]
        baseline_score_d: Dict[int, float] = fetch_score_per_pid(
            baseline_pid_entries)

        gold_pids = gold[cid]

        labels = []
        per_doc_counter = Counter()
        for pid, entries in pid_entries.items():
            label = any([pid in pids for pids in gold_pids])
            labels.append(label)
            base_score = baseline_score_d[int(pid)]

            try:
                for doc_idx, entry in enumerate(entries):
                    doc_id = entry['kdp'].doc_id
                    score = get_score_from_entry(entry)
                    value = doc_value(score, base_score, int(label))
                    value_type = doc_value_group(value)
                    per_doc_counter[doc_id, value_type] += 1

            except KeyError:
                print(cid, doc_idx, "not found")
                pass
        doc_ids = set(left(per_doc_counter.keys()))
        for doc_id in doc_ids:
            n_good = per_doc_counter[doc_id, "good"]
            n_bad = per_doc_counter[doc_id, "bad"]
            doc_score = n_good - n_bad
            row = [cid, doc_id, doc_score]
            if doc_score > 2 or doc_score < -2:
                rows.append(row)

    print_table(rows)
Esempio n. 21
0
def main():
    print("Loading doc score")
    doc_scores = dict(doc_score_predictions())
    print("Loading cppnc scores")
    save_name = "qcknc_val"
    cid_grouped: Dict[str, Dict[str, List[Dict]]] = load_cppnc_score(save_name)
    print(".")

    gold = get_claim_perspective_id_dict()
    baseline_cid_grouped: Dict[int, List] = load_baseline("train_baseline")
    claim_d = load_train_claim_d()

    for cid, pid_entries_d in cid_grouped.items():
        pid_entries_d: Dict[str, List[Dict]] = pid_entries_d
        baseline_pid_entries = baseline_cid_grouped[int(cid)]

        baseline_score_d = fetch_score_per_pid(baseline_pid_entries)

        gold_pids = gold[int(cid)]

        def get_score_per_pid_entry(p_entries: Tuple[str, List[Dict]]):
            _, entries = p_entries
            return average(lmap(get_score_from_entry, entries))

        pid_entries: List[Tuple[str, List[Dict]]] = list(pid_entries_d.items())
        pid_entries.sort(key=get_score_per_pid_entry, reverse=True)

        s = "{} : {}".format(cid, claim_d[int(cid)])
        print(s)
        doc_info_d, doc_value_arr, labels = collect_score_per_doc(baseline_score_d, get_score_from_entry, gold_pids,
                                                                  pid_entries)

        pids = left(pid_entries)
        head1 = [""] * 4 + pids
        head2 = ["avg", "doc_id", "passage_idx", "pknc_pred"] + lmap(bool_to_yn, labels)
        rows = [head1, head2]
        doc_score = doc_scores[cid]
        assert len(doc_value_arr) == len(doc_score)

        for doc_idx, (pred_score, doc_values) in enumerate(zip(doc_score, doc_value_arr)):
            doc_id, passage_idx = doc_info_d[doc_idx]
            avg = average(doc_values)
            row_float = [avg, doc_id, passage_idx, pred_score] + doc_values
            row = lmap(lambda x: "{0}".format(x), row_float)
            rows.append(row)
        print_table(rows)
Esempio n. 22
0
def get_relevance_judgement_only_from_candidate():
    split = "dev"
    candidates: List[Tuple[int, List[Dict]]] = get_eval_candidates_from_pickle(split)
    valid_set = set()
    for cid, items in candidates:
        for e in items:
            pid = e['pid']
            valid_set.add((cid, pid))
    gold: Dict[int, List[List[int]]] = get_claim_perspective_id_dict()
    l = []
    for cid, clusters in gold.items():
        query_id = str(cid)
        pids = set(flatten(clusters))
        for pid in pids:
            if (cid, pid) in valid_set:
                e = TrecRelevanceJudgementEntry(query_id, str(pid), 1)
                l.append(e)
    return l
Esempio n. 23
0
def build_gold_lms(claims) -> List[ClaimLM]:
    gold = get_claim_perspective_id_dict()
    tokenizer = PCTokenizer()

    def get_cluster_lm(cluster: List[int]) -> Counter:
        p_text_list: List[str] = lmap(perspective_getter, cluster)
        tokens_list: List[List[str]] = lmap(tokenizer.tokenize_stem,
                                            p_text_list)
        counter_list = lmap(tokens_to_freq, tokens_list)
        counter = average_counters(counter_list)
        return counter

    def get_claim_lm(claim) -> ClaimLM:
        cid = claim["cId"]
        counter_list: List[Counter] = lmap(get_cluster_lm, gold[cid])
        counter: Counter = average_counters(counter_list)
        return ClaimLM(cid, claim['text'], counter)

    claim_lms = lmap(get_claim_lm, claims)
    return claim_lms
Esempio n. 24
0
def perspective_lm_correlation():
    d_ids = list(load_dev_claim_ids())
    claims = get_claims_from_ids(d_ids)
    top_k = 20
    gold = get_claim_perspective_id_dict()
    predictions = predict_with_lm(claims, top_k)

    avg_pos_list = []
    avg_neg_list = []
    for c_Id, prediction_list in predictions:
        gold_pids = gold[c_Id]
        claim_text = prediction_list[0]['claim_text']

        pos_list = []
        neg_list = []
        print("Claim {}: ".format(c_Id), claim_text)
        for prediction in prediction_list:
            pid = prediction['pid']
            valid = False
            for cluster in gold_pids:
                if pid in cluster:
                    valid = True
                    break
            print("{0} {1:.2f} {2}".format(valid, prediction['lm_score'],
                                           prediction['perspective_text']))
            if not valid:
                neg_list.append(prediction['lm_score'])
            else:
                pos_list.append(prediction['lm_score'])

        if pos_list and neg_list:
            pos_score = average(pos_list)
            neg_score = average(neg_list)
            avg_pos_list.append(pos_score)
            avg_neg_list.append(neg_score)

    diff, p = ttest_ind(avg_pos_list, avg_neg_list)
    print("pos", average(avg_pos_list), "neg", average(avg_neg_list))
    print("pos", avg_pos_list)
    print("neg", avg_neg_list)
    print(diff, p)
Esempio n. 25
0
def evaluate(predictions, debug=True):
    gold = get_claim_perspective_id_dict()
    prec_list = []
    recall_list = []
    for c_Id, prediction_list in predictions:
        gold_pids = gold[c_Id]
        claim_text = prediction_list[0]['claim_text']
        if debug:
            print("Claim {}: ".format(c_Id), claim_text)
        prec, recall = get_prec_recll(prediction_list, gold_pids, debug)
        prec_list.append(prec)
        recall_list.append(recall)

    avg_prec = average(prec_list)
    avg_recall = average(recall_list)

    return {
        'precision': avg_prec,
        'recall': avg_recall,
        'f1': get_f1(avg_prec, avg_recall)
    }
Esempio n. 26
0
def get_candidates(claims, balance) -> List[PerspectiveCandidate]:
    related_p_map = get_claim_perspective_id_dict()
    related_p_map = {
        key: flatten(value)
        for key, value in related_p_map.items()
    }
    p_map = get_perspective_dict()

    all_data_points = []
    for c in claims:
        cid = c["cId"]
        claim_text = c["text"]
        lucene_results = es_helper.get_perspective_from_pool(claim_text, 50)

        rp = related_p_map[cid]

        pid_set = list([_pid for _text, _pid, _score in lucene_results])
        data_point_list = []
        for pid in pid_set:
            p_text = p_map[pid]
            label = 1 if pid in rp else 0
            data_point = PerspectiveCandidate(label=str(label),
                                              cid=cid,
                                              pid=pid,
                                              claim_text=claim_text,
                                              p_text=p_text)
            #data_point = [str(label), str(cid), str(pid), claim_text, p_text]
            data_point_list.append(data_point)

        # If training, we balance positive and negative examples.
        if balance:
            pos_insts = list([e for e in data_point_list if e.label == "1"])
            neg_insts = list([e for e in data_point_list if e.label == "0"])
            neg_insts = neg_insts[:len(pos_insts)]
            data_point_list = pos_insts + neg_insts
        all_data_points.extend(data_point_list)

    return all_data_points
Esempio n. 27
0
def build_df():
    claims, val = train_split()
    gold = get_claim_perspective_id_dict()

    tokenizer = PCTokenizer()
    df = Counter()

    dl_list = []
    for claim in claims:
        cid = claim["cId"]
        gold_pids = flatten(gold[cid])
        p_text_list: List[str] = lmap(perspective_getter, gold_pids)
        tokens_list = lmap(tokenizer.tokenize_stem, p_text_list)
        dl_list.extend(lmap(len, tokens_list))

        for t in set(flatten(tokens_list)):
            df[t] += 1

    print(dl_list)
    print("Avdl", average(dl_list))
    print(len(claims))
    print(df.most_common(30))
    save_to_pickle(df, "pc_df")
Esempio n. 28
0
def main(input_path):
    claims = get_all_claims()
    claim_d = claims_to_dict(claims)
    gold: Dict[int, List[List[int]]] = get_claim_perspective_id_dict()
    grouped_ranked_list = load_ranked_list_grouped(input_path)

    def is_correct(qid: str, doc_id: str):
        return any([int(doc_id) in cluster for cluster in gold[int(qid)]])

    top_k = 5
    for qid, entries in grouped_ranked_list.items():
        n_gold = sum(map(len, gold[int(qid)]))
        cut_n = min(n_gold, top_k)
        correctness = list([is_correct(qid, e.doc_id) for e in entries[:cut_n]])
        num_correct = sum(lmap(int, correctness))
        p_at_k = num_correct / cut_n

        pid_to_rank: Dict[str, int] = {e.doc_id: e.rank for e in entries}

        def get_rank(pid: int):
            if str(pid) in pid_to_rank:
                return pid_to_rank[str(pid)]
            else:
                return "X"

        if p_at_k < 0.3:
            print(n_gold)
            print(p_at_k)
            print("Claim {} {}".format(qid, claim_d[int(qid)]))##
            for cluster in gold[int(qid)]:
                print("-")
                for pid in cluster:
                    print("[{}]".format(get_rank(pid)), perspective_getter(int(pid)))
            for e in entries[:50]:
                correct_str = "Y" if is_correct(qid, e.doc_id) else "N"
                print("{} {} {}".format(correct_str, e.score, perspective_getter(int(e.doc_id))))
Esempio n. 29
0
def main():
    run_config = json.load(open(sys.argv[1], "r"))
    passage_score_path = run_config['passage_score_path']
    payload_name = run_config['payload_name']

    doc_scores: Dict[int, List[float]] = dict(
        load_doc_score_prediction(passage_score_path))
    baseline_cid_grouped, cid_grouped, claim_d = load_cppnc_score_and_baseline_and_group(
        payload_name)
    gold = get_claim_perspective_id_dict()

    g_counter = Counter()
    columns = ["pid doc pair", "good", "bad", "no effect", "no effect pid"]
    rows = [columns]
    record = []
    for cid, pid_entries in cid_grouped.items():
        baseline_pid_entries = baseline_cid_grouped[cid]

        baseline_score_d = {}
        for cpid, a_thing_array in baseline_pid_entries:
            _, pid = cpid
            assert len(a_thing_array) == 1
            score = a_thing_array[0]['score']
            baseline_score_d[pid] = score

        gold_pids = gold[cid]

        labels = []
        counter = Counter()
        for cpid, things in pid_entries:
            _, pid = cpid
            label = any([pid in pids for pids in gold_pids])
            labels.append(label)
            base_score = baseline_score_d[pid]

            any_effect = False
            try:
                for doc_idx, per_doc in enumerate(things):
                    score = per_doc['score']
                    value = doc_value(score, base_score, int(label))
                    qknc_score = doc_scores[cid][doc_idx]
                    if qknc_score < 0:
                        continue
                    value_type = doc_value_group(value)
                    counter[value_type] += 1
                    if value_type in ["good", "bad"]:
                        record.append((cid, pid, doc_idx, value_type))
                    if value_type != "no effect":
                        any_effect = True
                    counter["pid doc pair"] += 1
                if not any_effect:
                    counter["no effect pid"] += 1
            except KeyError:
                print(cid, doc_idx, "not found")
                pass
        row = [cid] + list([counter[c] for c in columns])
        rows.append(row)

        for key, count in counter.items():
            g_counter[key] += count

    row = ["all"] + list([g_counter[c] for c in columns])
    rows.append(row)
    row = ["rate"] + list(
        [g_counter[c] / g_counter["pid doc pair"] for c in columns])
    rows.append(row)
    print_table(rows)

    print_table(record)
Esempio n. 30
0
def main():
    print("Loading scores...")
    cid_grouped: Dict[str, Dict[str, List[Dict]]] = load_cppnc_score_wrap()
    baseline_cid_grouped = load_baseline("train_baseline")
    gold = get_claim_perspective_id_dict()
    tokenizer = get_tokenizer()
    claim_d = load_train_claim_d()

    print("Start analyzing")
    html = HtmlVisualizer("cppnc_value_per_token_score.html")
    claim_cnt = 0
    for cid, pid_entries_d in cid_grouped.items():
        pid_entries_d: Dict[str, List[Dict]] = pid_entries_d
        pid_entries: List[Tuple[str, List[Dict]]] = list(pid_entries_d.items())
        baseline_pid_entries = baseline_cid_grouped[int(cid)]
        baseline_score_d = fetch_score_per_pid(baseline_pid_entries)
        gold_pids = gold[int(cid)]

        ret = collect_score_per_doc(baseline_score_d, get_score_from_entry, gold_pids,
                                                                  pid_entries)
        passage_tokens_d = collect_passage_tokens(pid_entries)
        doc_info_d: Dict[int, Tuple[str, int]] = ret[0]
        doc_value_arr: List[List[float]] = ret[1]

        kdp_result_grouped = defaultdict(list)
        for doc_idx, doc_values in enumerate(doc_value_arr):
            doc_id, passage_idx = doc_info_d[doc_idx]
            avg_score = average(doc_values)
            kdp_result = doc_id, passage_idx, avg_score
            kdp_result_grouped[doc_id].append(kdp_result)

        s = "{} : {}".format(cid, claim_d[int(cid)])
        html.write_headline(s)
        claim_cnt += 1
        if claim_cnt > 10:
            break

        scores: List[float] = list([r[2] for r in doc_value_arr])

        foreach(html.write_paragraph, lmap(str, scores))

        for doc_id, kdp_result_list in kdp_result_grouped.items():
            html.write_headline(doc_id)
            tokens, per_token_score = combine_collect_score(tokenizer, doc_id, passage_tokens_d, kdp_result_list)
            str_tokens = tokenizer.convert_ids_to_tokens(tokens)
            row = cells_from_tokens(str_tokens)
            for idx in range(len(str_tokens)):
                score = per_token_score[idx][0]
                norm_score = min(abs(score) * 10000, 100)
                color = "B" if score > 0 else "R"
                row[idx].highlight_score = norm_score
                row[idx].target_color = color

            rows = [row]
            nth = 0
            any_score_found = True
            while any_score_found:
                any_score_found = False
                score_list = []
                for idx in range(len(str_tokens)):
                    if nth < len(per_token_score[idx]):
                        score = per_token_score[idx][nth]
                        any_score_found = True
                    else:
                        score = "-"
                    score_list.append(score)

                def get_cell(score):
                    if score == "-":
                        return Cell("-")
                    else:
                        # 0.01 -> 100
                        norm_score = min(abs(score) * 10000, 100)
                        color = "B" if score > 0 else "R"
                        return Cell("", highlight_score=norm_score, target_color=color)

                nth += 1
                if any_score_found:
                    row = lmap(get_cell, score_list)
                    rows.append(row)
            html.multirow_print_from_cells_list(rows)