예제 #1
0
def train():
    
    data = data_helper.load_pickle('model_data.pkl')

    train_q1 = data['train_q1']
    train_q2 = data['train_q2']
    train_y = data['train_label']

    dev_q1 = data['dev_q1']
    dev_q2 = data['dev_q2']
    dev_y = data['dev_label']
    
    test_q1 = data['test_q1']
    test_q2 = data['test_q2']
    test_y = data['test_label']
    
    model = siamese_model()
    checkpoint = ModelCheckpoint(model_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max', period=1)
    tensorboard = TensorBoard(log_dir=tensorboard_path)    
    earlystopping = EarlyStopping(monitor='val_acc', patience=10, verbose=0, mode='max')
    reduce_lr = ReduceLROnPlateau(monitor='val_acc', patience=5, mode='max')
    callbackslist = [checkpoint, tensorboard,earlystopping,reduce_lr]

    model.fit([train_q1, train_q2], train_y,
              batch_size=512,
              epochs=200,
              validation_data=([dev_q1, dev_q2], dev_y),
              callbacks=callbackslist)
    '''
    ## Add graphs here
    import matplotlib.pyplot as plt

    # summarize history for loss
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])   
    plt.plot(history.history['acc'])
    plt.plot(history.history['val_acc'])
    plt.plot(history.history['precision'])
    plt.plot(history.history['val_precision'])
    plt.plot(history.history['recall'])
    plt.plot(history.history['val_recall'])
    plt.plot(history.history['f1_score'])
    plt.plot(history.history['val_f1_score'])
    plt.xlabel('epoch')
    plt.legend(['train loss', 'val loss','train accuracy', 'val accuracy','train precision', 'val precision','train recall', 'val recall','train f1_score', 'val f1_score'], loc=3,
               bbox_to_anchor=(1.05,0),borderaxespad=0)
    pic = plt.gcf()
    pic.savefig ('pic.eps',format = 'eps',dpi=1000)
    plt.show()
    '''
    loss, accuracy= model.evaluate([test_q1, test_q2],test_y,verbose=1,batch_size=256)
    print("Test best model =loss: %.4f, accuracy:%.4f" % (loss, accuracy))
예제 #2
0
def pred(q1, q2):
    tokenizer = data_helper.load_pickle('./w2v/tokenizer.pkl')
    q1_ = tokenizer.texts_to_sequences([q1.split()])
    q2_ = tokenizer.texts_to_sequences([q2.split()])
    q1_ = pad_sequences(q1_, input_dim)
    q2_ = pad_sequences(q2_, input_dim)

    model = siamese_model()
    model.load_weights(model_path)

    pred_ = model.predict([q1_, q2_])

    print('q1:{}, q2:{}, sim:{}'.format(q1, q2, pred_))
예제 #3
0
파일: HiDR.py 프로젝트: yurui12138/HiDR
def train(params):
    data = data_helper.load_pickle('./model_data.pkl')

    train_q1 = data['train_q1']
    train_q2 = data['train_q2']
    train_y = data['train_label']

    dev_q1 = data['dev_q1']
    dev_q2 = data['dev_q2']
    dev_y = data['dev_label']

    test_q1 = data['test_q1']
    test_q2 = data['test_q2']
    test_y = data['test_label']

    model = siamese_model(params)
    checkpoint = ModelCheckpoint(model_path,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='max',
                                 period=1)
    tensorboard = TensorBoard(log_dir=tensorboard_path)
    earlystopping = EarlyStopping(monitor='val_acc',
                                  patience=10,
                                  verbose=0,
                                  mode='max',
                                  restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_acc', patience=3, mode='max')
    callbackslist = [checkpoint, tensorboard, earlystopping, reduce_lr]

    model.fit([train_q1, train_q2],
              train_y,
              batch_size=512,
              epochs=200,
              verbose=2,
              validation_data=([dev_q1, dev_q2], dev_y),
              callbacks=callbackslist)

    loss, accuracy, precision, recall, f1_score = model.evaluate(
        [test_q1, test_q2], test_y, verbose=1, batch_size=256)
    print(
        "Test best model =loss: %.4f, accuracy:%.4f,precision:%.4f,recall:%.4f,f1_score:%.4f"
        % (loss, accuracy, precision, recall, f1_score))
예제 #4
0
def train():
    data = data_helper.load_pickle('./w2v/model_data.pkl')

    train_q1 = data['train_q1']
    train_q2 = data['train_q2']
    train_y = data['train_label']

    dev_q1 = data['dev_q1']
    dev_q2 = data['dev_q2']
    dev_y = data['dev_label']

    model = siamese_model()
    checkpoint = ModelCheckpoint(model_path, monitor='val_acc', verbose=1, save_best_only=True, mode='max', period=10)
    tensorboard = TensorBoard(log_dir=tensorboard_path)
    callbackslist = [checkpoint, tensorboard]
    model.fit([train_q1, train_q2], train_y,
              batch_size=128,
              epochs=200,
              validation_data=([dev_q1, dev_q2], dev_y),
              callbacks=callbackslist)
예제 #5
0
from keras import backend as K
from keras.callbacks import ModelCheckpoint, TensorBoard,EarlyStopping, ReduceLROnPlateau
from keras.layers import Embedding, Input, Bidirectional, Lambda,LSTM,SimpleRNN,Dense,Activation,subtract,add,multiply,concatenate,dot,Dropout,BatchNormalization
from keras.models import Model,Sequential
from keras.optimizers import Adam,Adadelta,RMSprop
from keras.preprocessing.sequence import pad_sequences
import data_helper
from attention import AttentionWithContext, AttentionLayer

dim = 300
input_dim = data_helper.MAX_SEQUENCE_LENGTH
emb_dim = data_helper.EMB_DIM
model_path = './model/siameselstm.hdf5'
tensorboard_path = './model/ensembling'

embedding_matrix = data_helper.load_pickle('embedding_matrix.pkl')

embedding_layer = Embedding(embedding_matrix.shape[0],
                            emb_dim,
                            weights=[embedding_matrix],
                            input_length=input_dim,
                            trainable=False)

def base_network1(input_shape):
    input = Input(shape=input_shape)

    p = embedding_layer(input)
    p = LSTM(dim, return_sequences=True, dropout=0.5,name='f_input')(p)
    p = AttentionWithContext()(p)

    q = embedding_layer(input)     
예제 #6
0
파일: train.py 프로젝트: ZQpengyu/MAFFo
#config = tf.compat.v1.ConfigProto()
#config.gpu_options.allow_growth=True   #不全部占满显存, 按需分配
#sess = tf.compat.v1.Session(config=config)

from keras.utils import plot_model
import data_helper
from keras.layers import Embedding, Input, Bidirectional, LSTM, Concatenate, Add, Dropout, Dense, \
    BatchNormalization, Lambda, Activation, multiply, concatenate, Flatten, add, Dot,Permute
from keras.models import Model
import keras.backend as K
from keras.callbacks import *
from tensorflow.python.ops.nn import softmax

input_dim = data_helper.MAX_SEQUENCE_LENGTH
EMBDIM = data_helper.EMBDIM
embedding_matrix = data_helper.load_pickle('embedding_matrix.pkl')
model_data = data_helper.load_pickle('model_data.pkl')
embedding_layer = Embedding(embedding_matrix.shape[0],
                            EMBDIM,
                            weights=[embedding_matrix],
                            trainable=False)


def align(input_1, input_2):
    attention = Dot(axes=-1)([input_1, input_2])
    w_att_1 = Lambda(lambda x: softmax(x, axis=1))(attention)
    w_att_2 = Permute((2, 1))(Lambda(lambda x: softmax(x, axis=2))(attention))
    in1_aligned = Dot(axes=1)([w_att_1, input_1])
    in2_aligned = Dot(axes=1)([w_att_2, input_2])

    in1_aligned = add([in1_aligned, input_1])