Beispiel #1
0
def combine_pc_rel_with_cpid(prediction_file, info: Dict) \
        -> Dict[DataID, Tuple[CPIDPair, Logits, Logits]]:
    data = EstimatorPredictionViewer(prediction_file)
    print("Num data ", data.data_len)
    out_d: Dict[DataID, Tuple[CPIDPair, Logits, Logits]] = {}
    num_append = 0
    last_claim = None
    prev_data_id = None
    ticker = TimeEstimator(data.data_len)
    for entry in data:
        ticker.tick()
        logits = entry.get_vector("logits")
        data_id = entry.get_vector("data_id")[0]
        try:
            cur_info = info[data_id]
            if 'cid' in cur_info:
                cid = cur_info['cid']
                last_claim = cid, logits
                prev_data_id = data_id
            elif 'pid' in cur_info:
                pid = cur_info['pid']
                cid, c_logits = last_claim
                cpid = CPIDPair((cid, pid))
                out_d[data_id] = (cpid, c_logits, logits)
                out_d[prev_data_id] = (cpid, c_logits, logits)
                num_append += 1
            else:
                assert False
        except KeyError as e:
            print(e)
            pass
    return out_d
Beispiel #2
0
def collect_scores(prediction_file, info: Dict, logit_to_score) \
        -> Dict[DataID, Tuple[CPIDPair, float]]:
    data = EstimatorPredictionViewer(prediction_file)
    print("Num data ", data.data_len)
    out_d: Dict[DataID, Tuple[CPIDPair, float]] = {}
    for entry in data:
        logits = entry.get_vector("logits")
        score = logit_to_score(logits)
        data_id = entry.get_vector("data_id")[0]
        try:
            cur_info = info[str(data_id)]

            if 'kdp' in cur_info:
                parse_info_inner(cur_info, qck_convert_map, True)
                cid = int(cur_info['query'].query_id)
                pid = int(cur_info['candidate'].id)
            elif 'query' in cur_info:
                parse_info_inner(cur_info, qc_convert_map, True)
                cid = int(cur_info['query'].query_id)
                pid = int(cur_info['candidate'].id)
            else:
                cid = cur_info['cid']
                pid = cur_info['pid']
            cpid = CPIDPair((cid, pid))
            out_d[data_id] = (cpid, score)
        except KeyError as e:
            print("Key error", e)
            print("data_id", data_id)
            pass
    return out_d
Beispiel #3
0
def collect_data_w_cpid(prediction_file, info: Dict, logit_to_score) \
        -> List[Dict]:
    data = EstimatorPredictionViewer(prediction_file)
    print("Num data ", data.data_len)
    out = []
    for entry in data:
        logits = entry.get_vector("logits")
        score = logit_to_score(logits)
        data_id = entry.get_vector("data_id")[0]
        confidence = get_confidence_or_rel_score(entry)
        try:
            cur_info = info[str(data_id)]
            cid = cur_info['cid']
            pid = cur_info['pid']

            cpid = CPIDPair((cid, pid))
            cur_info['cpid'] = cpid
            cur_info['score'] = score
            cur_info['confidence'] = confidence
            out.append(cur_info)
        except KeyError as e:
            print("Key error")
            print("data_id", data_id)
            pass
    return out
Beispiel #4
0
def get_claim_perspective_label_dict() -> Dict[CPIDPair, int]:
    gold = get_claim_perspective_id_dict()
    d = defaultdict(int)
    for cid, pid_list_list in gold.items():
        for pid in flatten(pid_list_list):
            cpid_pair = CPIDPair((cid, pid))
            d[cpid_pair] = 1
    return d
Beispiel #5
0
    def make_decisions(e: Tuple[int, List[Dict]]):
        cid, p_list = e
        decisions = []
        for p in p_list:
            pid = int(p['pid'])
            query_id = CPIDPair((cid, pid))

            if query_id in score_d:
                score = score_d[query_id]
            else:
                score = 0

            binary = 1 if score > 0.5 else 0
            decisions.append((cid, pid, binary))

        return cid, decisions
Beispiel #6
0
    def rank(e: Tuple[int, List[Dict]]):
        cid, p_list = e
        scored_p_list: List[Dict] = []
        for p in p_list:
            pid = int(p['pid'])
            query_id = CPIDPair((cid, pid))

            if query_id in score_d:
                score = score_d[query_id]
                suc_count.suc()
            else:
                score = -2
                suc_count.fail()
            p['score'] = score
            scored_p_list.append(p)

        scored_p_list.sort(key=lambda x: x['score'], reverse=True)
        return cid, scored_p_list[:top_k]
Beispiel #7
0
def main():
    info = load_from_pickle("pc_rel_dev_info_all")
    prediction_path = pjoin(output_path, "pc_rel_dev")
    rel_info: Dict[DataID,
                   Tuple[CPIDPair, Logits,
                         Logits]] = load_from_pickle("pc_rel_dev_with_cpid")
    #rel_info: Dict[DataID, Tuple[CPIDPair, Logits, Logits]] = combine_pc_rel_with_cpid(prediction_path, info)

    doc_index = reverse_index(rel_info)
    tokenizer = get_tokenizer()

    while True:
        s = input()
        os.system('cls')
        cid, pid = s.split()
        cid = int(cid)
        pid = int(pid)
        cpid = CPIDPair((cid, pid))
        do_print(cpid, doc_index, tokenizer)
Beispiel #8
0
def collect_scores_and_confidence(prediction_file, info: Dict, logit_to_score) \
        -> Dict[DataID, Tuple[CPIDPair, float, float]]:
    data = EstimatorPredictionViewer(prediction_file)
    print("Num data ", data.data_len)
    out_d: Dict[DataID, Tuple[CPIDPair, float, float]] = {}
    for entry in data:
        logits = entry.get_vector("logits")
        score = logit_to_score(logits)
        data_id = entry.get_vector("data_id")[0]
        confidence = get_confidence_or_rel_score(entry)
        try:
            cur_info = info[str(data_id)]
            cid = cur_info['cid']
            pid = cur_info['pid']
            cpid = CPIDPair((cid, pid))
            out_d[data_id] = (cpid, score, confidence)
        except KeyError as e:
            print("Key error")
            print("data_id", data_id)
            pass
    return out_d
Beispiel #9
0
def collect_info(prediction_file, info: Dict, logit_to_score) -> Dict[CPIDPair, List[Tuple[float, float, Dict]]]:
    data = EstimatorPredictionViewer(prediction_file)
    print("Num data ", data.data_len)
    out_d: Dict[CPIDPair, List[Tuple[float, float, Dict]]] = defaultdict(list)


    for entry in data:
        logits = entry.get_vector("logits")
        score = logit_to_score(logits)
        rel_score = entry.get_vector("rel_score")[0]
        data_id = entry.get_vector("data_id")[0]
        try:
            cur_info = info[str(data_id)]
            cid = cur_info['cid']
            pid = cur_info['pid']
            cpid = CPIDPair((cid, pid))
            out_d[cpid].append((score, rel_score, cur_info))
        except KeyError as e:
            print("Key error")
            print("data_id", data_id)
            pass
    return out_d
Beispiel #10
0
def CPID_to_CPIDPair(cpid: CPID) -> CPIDPair:
    cid, pid = cpid.split("_")
    return CPIDPair((int(cid), int(pid)))