Beispiel #1
0
def save_dev_scores(model, val, val_x):
    val_probs = model.predict_proba(val_x)
    score_d = {}
    for pc_vector_feature, prediction in zip(val, val_probs):
        cid = pc_vector_feature.claim_pers.cid
        pid = pc_vector_feature.claim_pers.pid
        cpid = CPID("{}_{}".format(cid, pid))
        score_d[cpid] = prediction[1]
    save_to_pickle(score_d, "pc_ngram_logits")
def prediction_to_dict(
        prediction: List[Tuple[str, List[Dict]]]) -> Dict[CPID, float]:
    output: Dict[CPID, float] = {}
    for claim_id, preds in prediction:

        for pred in preds:
            cpid = CPID("{}_{}".format(claim_id, pred['pid']))
            score = pred['score']
            output[cpid] = float(score)

    return output
Beispiel #3
0
def get_cpids_and_token_keys(
        tokenizer: FullTokenizer,
        claim_entry: ParagraphClaimPersFeature) -> Tuple[str, CPID]:
    claim_text = claim_entry.claim_pers.claim_text
    claim_tokens = tokenizer.tokenize(claim_text)
    p_text = claim_entry.claim_pers.p_text
    p_tokens = tokenizer.tokenize(p_text)
    key = " ".join(claim_tokens) + "_" + " ".join(p_tokens)
    cpid: CPID = CPID("{}_{}".format(claim_entry.claim_pers.cid,
                                     claim_entry.claim_pers.pid))
    return key, cpid
Beispiel #4
0
    def get_cpid(data_id, info_d) -> CPID:
        try:
            info_1 = info_d[data_id - 1]
            info_2 = info_d[data_id]
            cid = info_1['cid']
            pid = info_2['pid']
        except KeyError:
            info_1 = info_d[data_id]
            info_2 = info_d[data_id + 1]
            cid = info_1['cid']
            pid = info_2['pid']

        return CPID("{}_{}".format(cid, pid))
Beispiel #5
0
def collect_by_order(input_file, feature_data: List[PerspectiveCandidate]):
    predictions = EstimatorPredictionViewer(input_file)

    print("prediction : {}".format(predictions.data_len))
    print("feature_data : {}".format(len(feature_data)))

    score_d: Dict[CPID, float] = {}
    for pred_entry, pc_candidate in zip(predictions, feature_data):
        logits = pred_entry.get_vector("logits")
        probs = softmax(logits)
        score = probs[1]

        cpid = CPID("{}_{}".format(pc_candidate.cid, pc_candidate.pid))
        score_d[cpid] = score

    return score_d
Beispiel #6
0
    def get_predictions(
        claim_and_candidate: Tuple[Dict,
                                   List[Dict]]) -> Tuple[str, List[Dict]]:
        claim_info, candidates = claim_and_candidate
        nonlocal dp_not_found
        for candi in candidates:
            cid = candi['cid']
            pid = candi['pid']
            cpid = CPID("{}_{}".format(cid, pid))

            if cpid in score_d:
                candi['new_score'] = score_d[cpid]
            else:
                dp_not_found += 1
                candi['new_score'] = 0.01

            candi['final_score'] = candi['new_score'] + candi['score'] / 100
            candi[
                'rationale'] = "final_score={}  cls_score={}  lucene_score={}".format(
                    candi['final_score'], candi['new_score'], candi['score'])

        candidates.sort(key=lambda c: c['final_score'], reverse=True)
        return claim_info['cId'], candidates[:top_k]
Beispiel #7
0
def CPID_to_CPIDPair(cpid: CPID) -> CPIDPair:
    cid, pid = cpid.split("_")
    return CPIDPair((int(cid), int(pid)))