Ejemplo n.º 1
0
def test_formality_score(files=None):
    if files is None:
        files = {
            'rule_based': ['./data/Family_Relationships/bpe_outputs/formal.rule_based.bpe'],
            'pbmt': ['./data/Family_Relationships/bpe_outputs/formal.pbmt.bpe'],
            'nmt_baseline': ['./data/Family_Relationships/bpe_outputs/formal.nmt_baseline.bpe'],
            'nmt_copy': ['./data/Family_Relationships/bpe_outputs/formal.nmt_copy.bpe'],
            'nmt_combined': ['./data/Family_Relationships/bpe_outputs/formal.nmt_combined.bpe'],
        }
    embedding_path = './new_exp_fr/embedding/embedding.bpe.big.txt'
    embedding, vocab_hash = embedding_api.load_word_embedding(embedding_path)
    nn = NNModel(np.array(embedding),mode='eval')
    nn.batch_size = 128
    nn.build_basic_rnn_model()
    eval_log={}
    for key in files.keys():
        if type(files[key])==type([]):
            fm_files=files[key]+'.bpe'
        else:
            fm_files=[files[key]+'.bpe']
        data=preprocess(informal_src_list=[],formal_src_list=fm_files,embedding_path=embedding_path,shuffle=False)
        result = nn.predict_prob([t.x for t in data], model_path='./new_exp_fr/classifier/model/1700model.ckpt')
        score=0
        for s in result:
            score+=s[1]
        print(key,score/len(data))
        eval_log[key]=score/len(data)
    return eval_log
Ejemplo n.º 2
0
def test():
    test = pickle.load(open('./new_exp_fr/classifier/test.pkl', 'rb'))
    embedding_path = './new_exp_fr/embedding/corpus.fine_tune_embedding.epoch.10'
    embedding,vocab_hash = embedding_api.load_word_embedding(embedding_path)
    nn = NNModel(np.array(embedding),mode='eval')
    nn.build_basic_rnn_model()
    nn.evaluate([t.x for t in test],[t.y for t in test],model_path='')
Ejemplo n.º 3
0
def predict(model_path,file_path='./new_exp_fr/classifier/val.pkl',embedding_path='./new_exp_fr/embedding/corpus.fine_tune_embedding.epoch.10'):
    test = pickle.load(open(file_path, 'rb'))
    embedding, vocab_hash = embedding_api.load_word_embedding(embedding_path)
    nn = NNModel(np.array(embedding),mode='predict')
    nn.batch_size=10000
    nn.build_basic_rnn_model()
    result=nn.predict_prob([t.x for t in test], model_path=model_path)
    return test,result
Ejemplo n.º 4
0
def use_nn_model():
    train = pickle.load(open('./new_exp_fr/classifier/train.pkl', 'rb'))
    val = pickle.load(open('./new_exp_fr/classifier/val.pkl', 'rb'))
    embedding_path = './new_exp_fr/embedding/embedding.bpe.big.txt'
    embedding, vocab_hash = embedding_api.load_word_embedding(embedding_path)
    nn=NNModel(np.array(embedding),mode='train')
    nn.build_basic_rnn_model()
    nn.train_model([t.x for t in train],[t.y for t in train],[t.x for t in val],[t.y for t in val],
                   continue_train=False, previous_model_path='./new_exp_fr/classifier/model/990model.ckpt')
Ejemplo n.º 5
0
def preprocess(informal_src_list,formal_src_list,embedding_path,output_path=None,shuffle=True):
    vectors,vocab_hash=embedding_api.load_word_embedding(embedding_path)
    all_data=[]
    for src in informal_src_list:
        with open(src,'r',encoding='utf-8') as f:
            for line in f:
                d=Data(line.strip().split(), 0, line.strip())
                d.str2index(vocab_hash,with_unk=False)
                all_data.append(d)
    for src in formal_src_list:
        with open(src,'r',encoding='utf-8') as f:
            for line in f:
                d=Data(line.strip().split(), 1, line.strip())
                d.str2index(vocab_hash,with_unk=False)
                all_data.append(d)
    if shuffle:
        random.shuffle(all_data)
    if output_path is not None:
        pickle.dump(all_data,open(output_path,'wb'),protocol=True)
    return all_data
Ejemplo n.º 6
0
def cal_formality_score_for_each_sentence(output_dir,files=None):
    if files is None:
        files = {
            'rule_based': ['./data/Family_Relationships/bpe_outputs/formal.rule_based.bpe'],
            'pbmt': ['./data/Family_Relationships/bpe_outputs/formal.pbmt.bpe'],
            'nmt_baseline': ['./data/Family_Relationships/bpe_outputs/formal.nmt_baseline.bpe'],
            'nmt_copy': ['./data/Family_Relationships/bpe_outputs/formal.nmt_copy.bpe'],
            'nmt_combined': ['./data/Family_Relationships/bpe_outputs/formal.nmt_combined.bpe'],
        }
    embedding_path = './new_exp_fr/embedding/embedding.bpe.big.txt'
    embedding, vocab_hash = embedding_api.load_word_embedding(embedding_path)
    nn = NNModel(np.array(embedding),mode='eval')
    nn.batch_size = 128
    nn.build_basic_rnn_model()
    for key in files.keys():
        data=preprocess(informal_src_list=[],formal_src_list=files[key],embedding_path=embedding_path,shuffle=False)
        result = nn.predict_prob([t.x for t in data], model_path='./new_exp_fr/classifier/model/1700model.ckpt')
        base_name=os.path.basename(files[key])
        with open(os.path.join(output_dir,base_name+'.formality_score'),'w',encoding='utf-8') as fw:
            for r in result:
                fw.write(str(r[1])+'\n')
Ejemplo n.º 7
0
def evaluate_one_formality(input_file_path,is_inf):
    embedding_path = './new_exp_fr/embedding/embedding.bpe.big.txt'
    embedding, vocab_hash = embedding_api.load_word_embedding(embedding_path)
    nn = NNModel(np.array(embedding), mode='eval')
    nn.batch_size = 128
    nn.build_basic_rnn_model()
    if is_inf:
        data = preprocess(informal_src_list=[input_file_path], formal_src_list=[], embedding_path=embedding_path,
                          shuffle=False)
    else:
        data = preprocess(informal_src_list=[], formal_src_list=[input_file_path], embedding_path=embedding_path,
                          shuffle=False)
    result = nn.predict_prob([t.x for t in data], model_path='./new_exp_fr/classifier/model/1700model.ckpt')
    score = 0
    if is_inf:
        for s in result:
            score += s[0]
    else:
        for s in result:
            score += s[1]
    print(score / len(data))
    return score/len(data)