def create_model(): left_input = Input(shape=(max_seq_length, ), dtype='float32') right_input = Input(shape=(max_seq_length, ), dtype='float32') left_sen_representation = shared_model(left_input) right_sen_representation = shared_model(right_input) man_distance = ManDist()( [left_sen_representation, right_sen_representation]) sen_representation = concatenate( [left_sen_representation, right_sen_representation, man_distance]) similarity = Dense(1, activation='sigmoid')(Dense(2)(Dense(4)( Dense(16)(sen_representation)))) model = Model(inputs=[left_input, right_input], outputs=[similarity]) return model
if __name__ == '__main__': # 超参 batch_size = 1024 n_epoch = 9 n_hidden = 50 left_input = Input(shape=(max_seq_length,), dtype='float32') right_input = Input(shape=(max_seq_length,), dtype='float32') left_sen_representation = shared_model(left_input) right_sen_representation = shared_model(right_input) # 引入曼哈顿距离,把得到的变换concat上原始的向量再通过一个多层的DNN做了下非线性变换、sigmoid得相似度 # 没有使用https://zhuanlan.zhihu.com/p/31638132中提到的马氏距离,尝试了曼哈顿距离、点乘和cos,效果曼哈顿最好 man_distance = ManDist()([left_sen_representation, right_sen_representation]) sen_representation = concatenate([left_sen_representation, right_sen_representation, man_distance]) similarity = Dense(1, activation='sigmoid')(Dense(2)(Dense(4)(Dense(16)(sen_representation)))) model = Model(inputs=[left_input, right_input], outputs=[similarity]) model.compile(loss='mean_squared_error', optimizer=keras.optimizers.Adam(), metrics=['accuracy']) model.summary() training_start_time = time() malstm_trained = model.fit([X_train['left'], X_train['right']], Y_train, batch_size=batch_size, epochs=n_epoch, validation_data=([X_validation['left'], X_validation['right']], Y_validation)) training_end_time = time() print("Training time finished.\n%d epochs in %12.2f" % (n_epoch, training_end_time - training_start_time)) # Plot accuracy
# x.add(GlobalMaxPool1D()) # x.add(Dense(250, activation='relu')) # x.add(Dropout(0.3)) # x.add(Dense(1, activation='sigmoid')) # LSTM x.add(LSTM(n_hidden)) shared_model = x # The visible layer left_input = Input(shape=(max_seq_length, ), dtype='int32') right_input = Input(shape=(max_seq_length, ), dtype='int32') # Pack it all up into a Manhattan Distance model malstm_distance = ManDist()( [shared_model(left_input), shared_model(right_input)]) model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.summary() shared_model.summary() try: # Start trainings training_start_time = time() callbacks = [EarlyStopping(monitor='val_loss', patience=3)] malstm_trained = model.fit( [X_train['left'], X_train['right']],
embedding_dim, weights=[embeddings], input_length=max_seq_length, trainable=False) # Embedded version of the inputs encoded_left = embedding_layer(left_input) encoded_right = embedding_layer(right_input) # Since this is a siamese network, both sides share the same LSTM shared_lstm = LSTM(50) left_output = shared_lstm(encoded_left) right_output = shared_lstm(encoded_right) malstm_distance = ManDist()([left_output, right_output]) # Pack it all up into a model model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) if gpus >= 2: # `multi_gpu_model()` is a so quite buggy. it breaks the saved model. model = tf.keras.utils.multi_gpu_model(model, gpus=gpus) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.summary() # Start trainings training_start_time = time() malstm_trained = model.fit(
# x.add(Conv1D(250, kernel_size=5, activation='relu')) # x.add(GlobalMaxPool1D()) # x.add(Dense(250, activation='relu')) # x.add(Dropout(0.3)) # x.add(Dense(1, activation='sigmoid')) # LSTM x.add(LSTM(n_hidden)) shared_model = x # The visible layer left_input = Input(shape=(max_seq_length, ), dtype='int32') right_input = Input(shape=(max_seq_length, ), dtype='int32') # Pack it all up into a Manhattan Distance model malstm_distance = ManDist()([x(left_input), x(right_input)]) model = Model(inputs=[left_input, right_input], outputs=[malstm_distance]) if gpus >= 2: # `multi_gpu_model()` is a so quite buggy. it breaks the saved model. model = tf.keras.utils.multi_gpu_model(model, gpus=gpus) model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) model.summary() shared_model.summary() # Start trainings training_start_time = time() malstm_trained = model.fit( [X_train['left'], X_train['right']],