Ejemplo n.º 1
0
else:
    model_name = model_name.split('/')[-1]
result_fname = args.result + '/' + model_name + '_to_' + args.test + '.txt'

print('##### model:', args.model)
print('##### test data:', args.test)
print('##### your result file:', result_fname)

# # Load data

# In[3]:

from koreanframenet import koreanframenet
kfn = koreanframenet.interface(version='1.2')

en_trn, en_dev, en_tst = dataio.load_data(srl=srl, language='en')

ekfn_trn_d, ekfn_tst_d = kfn.load_data(source='efn')
jkfn_trn_d, jkfn_tst_d = kfn.load_data(source='jfn')
skfn_trn_d, skfn_unlabel_d, skfn_tst_d = kfn.load_data(source='sejong')
pkfn_trn_d, pkfn_unlabel_d, pkfn_tst_d = kfn.load_data(source='propbank')

ekfn_trn = dataio.data2tgt_data(ekfn_trn_d, mode='train')
ekfn_tst = dataio.data2tgt_data(ekfn_tst_d, mode='train')

jkfn_trn = dataio.data2tgt_data(jkfn_trn_d, mode='train')
jkfn_tst = dataio.data2tgt_data(jkfn_tst_d, mode='train')

skfn_trn = dataio.data2tgt_data(skfn_trn_d, mode='train')
skfn_unlabel = dataio.data2tgt_data(skfn_unlabel_d, mode='train')
skfn_tst = dataio.data2tgt_data(skfn_tst_d, mode='train')
Ejemplo n.º 2
0
# # Define task

# In[2]:

srl = 'framenet'
language = 'en'
fnversion = 1.7

# # Load data

# In[3]:

trn, dev, tst = dataio.load_data(srl=srl,
                                 language=language,
                                 fnversion=fnversion,
                                 exem=False,
                                 info=True)

# In[4]:

# Parsing Gold Data


def test_model(model_path, masking=True, language='en'):
    #     torch.cuda.set_device(device)
    model = frame_parser.FrameParser(srl=srl,
                                     gold_pred=True,
                                     fnversion=fnversion,
                                     model_path=model_path,
                                     masking=masking,
Ejemplo n.º 3
0
def test(srl=False, masking=False, viterbi=False, language=False, model_path=False, 
         result_dir=False, train_lang=False, tgt=False,
         pretrained="bert-base-multilingual-cased"):
    if not result_dir:
        result_dir = '/disk/data/models/'+model_dir.split('/')[-2]+'-result/'
    else:
        pass
    if result_dir[-1] != '/':
        result_dir = result_dir+'/'
        
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
        
    if not train_lang:
        train_lang = language
    
    fname = fname = result_dir+train_lang+'_for_'+language
        
    if masking:
        fname = fname + '_with_masking_result.txt'
    else:
        fname = fname +'_result.txt'
        
    print('### Your result would be saved to:', fname)
        
    trn, dev, tst = dataio.load_data(srl=srl, language=language, exem=False)
    
    print('### EVALUATION')
    print('MODE:', srl)
    print('target LANGUAGE:', language)
    print('trained LANGUAGE:', train_lang)
    print('Viterbi:', viterbi)
    print('masking:', masking)
    print('using TGT token:', tgt)
    tic()    
    
#     models = [model_path]
        
    models = glob.glob(model_path+'*/')
    
#     models = []
    
    # en_exemplar best
#     models.append('/disk/data/models/dict_framenet/enModel-with-exemplar/9/')
#     models.append('/disk/data/models/frameBERT/frameBERT_en/')
    
#     # ko best
#     models.append('/disk/data/models/framenet/koModel/35/')
    
    # mul best
#     models.append('/disk/data/models/framenet_old/mulModel-100/39/')
#     models.append('/disk/data/models/dict_framenet/mulModel-100/39/')

#     mul best
#     models.append('/disk/data/models/framenet_old/mulModel-100/39/')
#     models.append(model_path+'36/')
#     models.append(model_path+'37/')
    
    eval_result = []
    for m in models:
#         m = '/disk/data/models/framenet/enModel-with-exemplar/epoch-8-joint.pt'
        print('### model dir:', m)
        print('### TARGET LANGUAGE:', language)
        torch.cuda.set_device(device)
        model = parser.ShallowSemanticParser(srl=srl,gold_pred=True, model_path=m, viterbi=viterbi, 
                                             masking=masking, language='multilingual', tgt=tgt,
                                             pretrained=pretrained)

        gold_senses, pred_senses, gold_args, pred_args = [],[],[],[]        
        gold_full_all, pred_full_all = [],[]

        for instance in tst:
            torch.cuda.set_device(device)
#             try:
            result = model.parser(instance)

            gold_sense = [i for i in instance[2] if i != '_'][0]
            pred_sense = [i for i in result[0][2] if i != '_'][0]


            gold_arg = [i for i in instance[3] if i != 'X']
            pred_arg = [i for i in result[0][3]]

            gold_senses.append(gold_sense)
            pred_senses.append(pred_sense)
            
            gold_args.append(gold_arg)
            pred_args.append(pred_arg)

            if srl == 'framenet':
                gold_full = []
                gold_full.append(gold_sense)
                gold_full.append(gold_sense)
                weighted_gold_args = weighting(gold_sense, gold_arg)
                gold_full += weighted_gold_args

                pred_full = []
                pred_full.append(pred_sense)
                pred_full.append(pred_sense)
                weighted_pred_args = weighting(pred_sense, pred_arg)
                pred_full += weighted_pred_args

                gold_full_all.append(gold_full)
                pred_full_all.append(pred_full)
                


                
                    
#             except KeyboardInterrupt:
#                 raise
#             except:
#                 print("cuda error")
#                 pass
            
#             break
            
        acc = accuracy_score(gold_senses, pred_senses)
        arg_f1 = f1_score(gold_args, pred_args)
        arg_precision = precision_score(gold_args, pred_args)
        arg_recall = recall_score(gold_args, pred_args)
        

#         epoch = m.split('/')[-1].split('-')[1]
        epoch = m.split('/')[-2]
        print('# EPOCH:', epoch)
        print("SenseId Accuracy: {}".format(acc))
        print("ArgId Precision: {}".format(arg_precision))
        print("ArgId Recall: {}".format(arg_recall))
        print("ArgId F1: {}".format(arg_f1))
        if srl == 'framenet':
            full_f1 = f1_score(gold_full_all, pred_full_all)
            full_precision = precision_score(gold_full_all, pred_full_all)
            full_recall = recall_score(gold_full_all, pred_full_all)
            print("full-structure Precision: {}".format(full_precision))
            print("full-structure Recall: {}".format(full_recall))
            print("full-structure F1: {}".format(full_f1))
        print('-----processing time:', tac())
        print('')


        model_result = []
        model_result.append(epoch)
        model_result.append(acc)
        model_result.append(arg_precision)
        model_result.append(arg_recall)
        model_result.append(arg_f1)
        if srl == 'framenet':
            model_result.append(full_precision)
            model_result.append(full_recall)
            model_result.append(full_f1)
        model_result = [str(i) for i in model_result]
        eval_result.append(model_result)
    
    
    with open(fname,'w') as f:
        if srl == 'framenet':
            f.write('epoch'+'\t''SenseID'+'\t'+'Arg_P'+'\t'+'Arg_R'+'\t'+'ArgF1'+'\t'+'full_P'+'\t'+'full_R'+'\t'+'full_F1'+'\n')
        else:
            f.write('epoch'+'\t''SenseID'+'\t'+'Arg_P'+'\t'+'Arg_R'+'\t'+'ArgF1'+'\n')
        for i in eval_result:
            line = '\t'.join(i)
            f.write(line+'\n')
            
        print('\n\t### Your result is saved at:', fname)
Ejemplo n.º 4
0
def test(srl=False,
         masking=False,
         viterbi=False,
         language=False,
         model_path=False,
         result_dir=False,
         train_lang=False,
         tgt=False,
         pretrained="bert-base-multilingual-cased"):
    if not result_dir:
        result_dir = '/disk/data/models/' + model_dir.split(
            '/')[-2] + '-result/'
    else:
        pass
    if result_dir[-1] != '/':
        result_dir = result_dir + '/'

    if not train_lang:
        train_lang = language

    if not os.path.exists(result_dir):
        os.makedirs(result_dir)

    if viterbi:
        fname = result_dir + train_lang + '_for_' + language + '_with_viterbi'
    else:
        fname = result_dir + train_lang + '_for_' + language

    if masking:
        fname = fname + '_with_masking'
    else:
        pass

    if 'large' in pretrained:
        fname = fname + '_large_tgt_result.txt'
    else:
        fname = fname + '_tgt_result.txt'

    print('### Your result would be saved to:', fname)

    trn, dev, tst = dataio.load_data(srl=srl, language=language)
    print('### EVALUATION')
    print('MODE:', srl)
    print('target LANGUAGE:', language)
    print('trained LANGUAGE:', train_lang)
    print('Viterbi:', viterbi)
    print('masking:', masking)
    print('using TGT token:', tgt)
    tic()

    models = glob.glob(model_path + '*.pt')

    eval_result = []
    for m in models:

        print('model:', m)

        model = parser.ShallowSemanticParser(srl=srl,
                                             gold_pred=True,
                                             model_path=m,
                                             viterbi=viterbi,
                                             masking=masking,
                                             language=language,
                                             tgt=tgt,
                                             pretrained=pretrained)

        gold_senses, pred_senses, gold_args, pred_args = [], [], [], []
        gold_full_all, pred_full_all = [], []

        for instance in tst:

            result = model.parser(instance)

            gold_sense = [i for i in instance[2] if i != '_'][0]
            pred_sense = [i for i in result[0][2] if i != '_'][0]

            gold_arg = [i for i in instance[3] if i != 'X']
            pred_arg = [i for i in result[0][3]]

            gold_senses.append(gold_sense)
            pred_senses.append(pred_sense)

            gold_args.append(gold_arg)
            pred_args.append(pred_arg)

            if srl == 'framenet':
                gold_full = []
                gold_full.append(gold_sense)
                gold_full.append(gold_sense)
                weighted_gold_args = weighting(gold_sense, gold_arg)
                gold_full += weighted_gold_args

                pred_full = []
                pred_full.append(pred_sense)
                pred_full.append(pred_sense)
                weighted_pred_args = weighting(pred_sense, pred_arg)
                pred_full += weighted_pred_args

                gold_full_all.append(gold_full)
                pred_full_all.append(pred_full)

#             break

        acc = accuracy_score(gold_senses, pred_senses)
        arg_f1 = f1_score(gold_args, pred_args)
        arg_precision = precision_score(gold_args, pred_args)
        arg_recall = recall_score(gold_args, pred_args)

        epoch = m.split('/')[-1].split('-')[1]
        print('# EPOCH:', epoch)
        print("SenseId Accuracy: {}".format(acc))
        print("ArgId Precision: {}".format(arg_precision))
        print("ArgId Recall: {}".format(arg_recall))
        print("ArgId F1: {}".format(arg_f1))
        if srl == 'framenet':
            full_f1 = f1_score(gold_full_all, pred_full_all)
            full_precision = precision_score(gold_full_all, pred_full_all)
            full_recall = recall_score(gold_full_all, pred_full_all)
            print("full-structure Precision: {}".format(full_precision))
            print("full-structure Recall: {}".format(full_recall))
            print("full-structure F1: {}".format(full_f1))
        print('-----processing time:', tac())
        print('')

        model_result = []
        model_result.append(epoch)
        model_result.append(acc)
        model_result.append(arg_precision)
        model_result.append(arg_recall)
        model_result.append(arg_f1)
        if srl == 'framenet':
            model_result.append(full_precision)
            model_result.append(full_recall)
            model_result.append(full_f1)
        model_result = [str(i) for i in model_result]
        eval_result.append(model_result)

#         break

#     print(eval_result)

    with open(fname, 'w') as f:
        if srl == 'framenet':
            f.write('epoch' + '\t'
                    'SenseID' + '\t' + 'Arg_P' + '\t' + 'Arg_R' + '\t' +
                    'ArgF1' + '\t' + 'full_P' + '\t' + 'full_R' + '\t' +
                    'full_F1' + '\n')
        else:
            f.write('epoch' + '\t'
                    'SenseID' + '\t' + 'Arg_P' + '\t' + 'Arg_R' + '\t' +
                    'ArgF1' + '\n')
        for i in eval_result:
            line = '\t'.join(i)
            f.write(line + '\n')