Beispiel #1
0
def eval(out_file, src_file, tgt_file, isDIn=False, num_pairs=500):
    """
        Given a filename, calculate the metric scores for that prediction file

        isDin: boolean value to check whether input file is DirectIn.txt
    """

    pairs = []

    with open(src_file, 'r') as infile:
        for line in infile:
            line = line.strip('\r\n')

            pair = {}

            pair['tokenized_sentence'] = line[:-1]

            pairs.append(pair)

    with open(tgt_file, "r") as infile:
        cnt = 0
        for line in infile:
            line = line.strip('\n')

            pairs[cnt]['tokenized_question'] = line[:-1]
            cnt += 1

    output = []
    with open(out_file, 'r') as infile:
        for line in infile:

            line = line[:-1]

            output.append(line)

    for idx, pair in enumerate(pairs):

        pair['prediction'] = output[idx]

    ## eval
    from eval import QGEvalCap
    import json
    from json import encoder
    encoder.FLOAT_REPR = lambda o: format(o, '.4f')

    res = defaultdict(lambda: [])
    gts = defaultdict(lambda: [])
    for pair in pairs[:]:
        key = pair['tokenized_sentence']
        res[key] = [pair['prediction'].encode('utf-8')]

        ## gts
        gts[key].append(pair['tokenized_question'].encode('utf-8'))

    QGEval = QGEvalCap(gts, res)
    return QGEval.evaluate()
Beispiel #2
0
def eval(out_file, src_file, tgt_file, isDIn=False, num_pairs=500):
    """
        Given a filename, calculate the metric scores for that prediction file

        isDin: boolean value to check whether input file is DirectIn.txt
    """

    pairs = []
    with open(src_file, 'r') as infile:
        for line in infile:
            pair = {}
            pair['tokenized_sentence'] = line[:-1].strip().lower()
            pairs.append(pair)

    with open(tgt_file, "r") as infile:
        cnt = 0
        for line in infile:
            pairs[cnt]['tokenized_question'] = line[:-1].strip()
            cnt += 1

    output = []
    with open(out_file, 'r') as infile:
        for line in infile:
            line = fix_tokenization(line[:-1].strip()).lower()
            output.append(line)

    for idx, pair in enumerate(pairs):
        pair['prediction'] = output[idx]

    # eval
    from eval import QGEvalCap
    import json
    from json import encoder
    encoder.FLOAT_REPR = lambda o: format(o, '.4f')

    res = defaultdict(lambda: [])
    gts = defaultdict(lambda: [])

    for pair in pairs[:]:
        #Le dict pair contient 3 clef: tokenized_sentence, tokenized_question et prediction
        key = pair['tokenized_sentence']
        res[key] = [pair['prediction'].encode('utf-8')]

        # gts
        gts[key].append(pair['tokenized_question'].encode('utf-8'))
        #print("\n")
        #print("TEXTE:{}\nREPONSE:{}\n".format(key.split("[sep]")[0], key.split("[sep]")[1]))
        #print("PREDICTION:",res[key])
        #print("\nSQUAD QUESTION:",gts[key])
        #print("\n")
        #print([key for key in pair.keys()])
        #print("GTS:{} et RES: {}".format(gts[key], res[key]))
    QGEval = QGEvalCap(gts, res)
    return QGEval.evaluate()
Beispiel #3
0
def eval(out_file, src_file, tgt_file, isDIn = False, num_pairs = 500):
    """
        Given a filename, calculate the metric scores for that prediction file

        isDin: boolean value to check whether input file is DirectIn.txt
    """

    pairs = []
    with open(src_file, 'r') as infile:
        for line in infile:
            pair = {}
            pair['tokenized_sentence'] = line[:-1]
            pairs.append(pair)

    with open(tgt_file, "r") as infile:
        cnt = 0
        for line in infile:
            pairs[cnt]['tokenized_question'] = line[:-1]
            cnt += 1

    output = []
    with open(out_file, 'r') as infile:
        for line in infile:
            line = line[:-1]
            output.append(line)


    for idx, pair in enumerate(pairs):
        pair['prediction'] = output[idx]


    ## eval
    from eval import QGEvalCap
    import json
    from json import encoder
    encoder.FLOAT_REPR = lambda o: format(o, '.4f')

    res = defaultdict(lambda: [])
    gts = defaultdict(lambda: [])
    for pair in pairs[:]:
        key = pair['tokenized_sentence']
        res[key] = [pair['prediction'].encode('utf-8')]

        ## gts 
        gts[key].append(pair['tokenized_question'].encode('utf-8'))

    QGEval = QGEvalCap(gts, res)
    return QGEval.evaluate()
Beispiel #4
0
def eval_squad(out_file, simplified_para_file, para_file, src_file, tgt_file, n_best, isDIn = False, num_pairs = 500):
    """
        Given a filename, calculate the metric scores for that prediction file
        isDin: boolean value to check whether input file is DirectIn.txt
    """
    
    simplified_paragraphs =[]    
    with open(simplified_para_file, 'r') as infile:
        for line in infile:            
            simplified_paragraphs.append(line[:-1])
    
    predictions = []
    with open(out_file, 'r') as infile:
        for i in range(len(simplified_paragraphs)):
            j = n_best
            array = []
            while j > 0:
                line = infile.readline()
                array.append(line[:-1])
                j -= 1
            predictions.append(array)
    
    assert(len(simplified_paragraphs) == len(predictions))
    
    # Mapping between paragraphs and predictions
    para_prediciton_map = {}
    for i in range(len(simplified_paragraphs)):
        para_prediciton_map[simplified_paragraphs[i]] = predictions[i]
        
    paragraphs =[]    
    with open(para_file, 'r') as infile:
        for line in infile:            
            paragraphs.append(line[:-1])
    
    sentences =[]    
    with open(src_file, 'r') as infile:
        for line in infile:            
            sentences.append(line[:-1])
    
    questions =[]    
    with open(tgt_file, 'r') as infile:
        for line in infile:            
            questions.append(line[:-1])
            
    pairs = defaultdict(lambda: {'sentences':set(), 'questions':[], 'predictions':[]})
    for paragraph, sentence, question in zip(paragraphs, sentences, questions):
        pairs[paragraph]['sentences'].add(sentence)
        pairs[paragraph]['questions'].append(question)
        pairs[paragraph]['predictions'] = para_prediciton_map[paragraph]
    
    # Clean up redundant predictions     
    for paragraph in pairs.keys():
        while len(pairs[paragraph]['sentences']) != len(pairs[paragraph]['predictions']):
            del pairs[paragraph]['predictions'][-1]

    ## eval
    from eval import QGEvalCap
    import json
    from json import encoder
    encoder.FLOAT_REPR = lambda o: format(o, '.4f')

    res = defaultdict(lambda: [])
    gts = defaultdict(lambda: [])
    for paragraph in pairs.keys():        
        for prediction in pairs[paragraph]['predictions']:      
            res[paragraph].append(prediction.encode('utf-8'))
        ## gts
        for question in pairs[paragraph]['questions']:
            gts[paragraph].append(question.encode('utf-8'))

    QGEval = QGEvalCap(gts, res)
    return QGEval.evaluate()
Beispiel #5
0
def eval(out_file, src_file, tgt_file, isDIn=False, num_pairs=500):
    """
        Given a filename, calculate the metric scores for that prediction file

        isDin: boolean value to check whether input file is DirectIn.txt
    """
    #pairs:リスト、サイズはlen(sentence)、中身はpair
    #pair:sentence,question_target,question_predictが辞書の形で格納されている。

    pairs = []
    with open(src_file, 'r') as infile:
        for line in infile:
            pair = {}
            pair['tokenized_sentence'] = line[:-1]
            pairs.append(pair)

    with open(tgt_file, "r") as infile:
        cnt = 0
        for line in infile:
            pairs[cnt]['tokenized_question'] = line[:-1]
            cnt += 1

    output = []
    with open(out_file, 'r') as infile:
        for line in infile:
            line = line[:-1]
            output.append(line)

    for idx, pair in enumerate(pairs):
        pair['prediction'] = output[idx]

    ## eval
    from eval import QGEvalCap
    import json
    from json import encoder
    encoder.FLOAT_REPR = lambda o: format(o, '.4f')

    #pair:sentence,prediction,tokenized_question

    #res:key:sentence,value:prediction
    #gts:key:sentence,value:question
    #ただし、gtsの方は同じsentenceについてはquestionを一つのsentenceに与える
    #また、一つの文につき一つのpredictしか評価していない。->10000文の内4000文は評価していない。

    #全ての文を評価する。(同じsrcでも答えが違っている場合はanswerを使うものは違っている可能性があるから)

    #文と疑問詞句が一致しているものをtargetとする
    if 0:
        target_dict = defaultdict(lambda: [])
        predict_dict = defaultdict(str)
        for i, pair in enumerate(pairs):
            s = pair["tokenized_sentence"]
            t = pair['tokenized_question'].encode('utf-8')
            p = pair['prediction'].encode('utf-8')
            target_dict[s].append(t)
            predict_dict[(p, s)] = s
        gts = {
            i: target_dict[predict_dict[(p["prediction"],
                                         p["tokenized_sentence"])]]
            for i, p in enumerate(pairs)
        }
        res = {i: p["prediction"] for i, p in enumerate(pairs)}

    if 0:
        res = defaultdict(lambda: [])
        gts = defaultdict(lambda: [])
        for i, pair in enumerate(pairs[:]):
            key = pair['tokenized_sentence']
            res[key] = [pair['prediction'].encode('utf-8')]
            gts[key].append(pair['tokenized_question'].encode('utf-8'))
        print(list(res.items())[0:5])
        print(list(gts.items())[0:5])

    #正しいもののみをペアにする
    if 1:
        target_dict = defaultdict(lambda: [])
        predict_dict = defaultdict(str)
        for i, pair in enumerate(pairs):
            s = pair["tokenized_sentence"]
            t = pair['tokenized_question'].encode('utf-8')
            p = pair['prediction'].encode('utf-8')
            target_dict[s].append(t)
            predict_dict[(p, s)] = s
        pairs = [p for p in pairs if "<UNK>" not in p["prediction"]]
        gts = {i: [p["tokenized_question"]] for i, p in enumerate(pairs)}
        res = {i: [p["prediction"]] for i, p in enumerate(pairs)}

    #print(len(pairs))
    print("size of items:{}".format(len(res.items())))

    QGEval = QGEvalCap(gts, res)
    return QGEval.evaluate()