def train(params): config_gpu() vocab, _ = get_vocab(model_path_trained) params['vocab_size'] = len(vocab) print("Building the model") model = Seq2Seq(params) checkpoint = tf.train.Checkpoint(Seq2Seq=model) check_point_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5) train_model(model, vocab, params, check_point_manager)
total_loss += batch_loss if batch % 5 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format( epoch + 1, batch, batch_loss.numpy())) # saving (checkpoint) the model every 2 epochs if (epoch + 1) % 2 == 0: checkpoint.save(file_prefix=checkpoint_prefix) print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / steps_per_epoch)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start)) # 读取vocab训练 vocab, reverse_vocab = get_vocab() if __name__ == '__main__': # 计算vocab size vocab_size = len(vocab) # 加载数据集 train_X, train_Y, test_X = load_dataset() # 使用GenSim训练好的embedding matrix embedding_matrix = load_embedding_matrix() input_sequence_len = 300 EPOCHS = 10 BATCH_SIZE = 64 embedding_dim = 300 units = 1024 encoder = Encoder(vocab_size, embedding_dim, embedding_matrix, units,
weights=[embedding_matrix], trainable=False, input_length=input_length)) model.add(Bidirectional(GRU(300, return_sequences=False))) model.add(Dense(300, activation="relu")) model.add(RepeatVector(output_sequence_length)) model.add(Bidirectional(GRU(300, return_sequences=True))) model.add(TimeDistributed(Dense(vocab_size, activation='softmax'))) model.compile(loss=sparse_categorical_crossentropy, optimizer=Adam(1e-3)) model.summary() return model if __name__ == '__main__': # 读取vocab训练 vocab, reverse_vocab = get_vocab(save_wv_model_path) # 计算vocab size vocab_size = len(vocab) # 使用GenSim训练好的embedding matrix embedding_matrix = load_word2vec_file(save_wv_model_path) input_sequence_len = 250 BATCH_SIZE = 64 embedding_dim = 300 units = 1024 # 编码器结构 encoder = Encoder(vocab_size, embedding_dim, embedding_matrix, units, BATCH_SIZE) # example_input example_input_batch = tf.ones(shape=(BATCH_SIZE, input_sequence_len),
prediction, dec_hidden = self.decoder(decoder_inp, dec_hidden, enc_output, context_vector) context_vector, attn = self.attention_layer(dec_hidden, enc_output) decoder_inp = tf.expand_dims(dec_target[:, t], 1) predictions.append(prediction) attentions.append(attn) return tf.stack(predictions, 1), dec_hidden if __name__ == '__main__': # GPU资源配置 #config_gpu() # 读取vocab训练 vocab, reverse_vocab = get_vocab(model_path_trained) # 计算vocab size vocab_size = len(vocab) batch_size = 128 input_sequence_len = 200 params = {} params["vocab_size"] = vocab_size params["embed_size"] = 300 params["enc_units"] = 256 params["attn_units"] = 512 params["dec_units"] = 512 params["batch_size"] = batch_size model = Seq2Seq(params)
train_df['X'] = train_df[['Question', 'Dialogue']].apply(lambda x: ' '.join(x), axis=1) test_df['X'] = test_df[['Question', 'Dialogue']].apply(lambda x: ' '.join(x), axis=1) # 获取输入数据 适当的最大长度 train_x_max_len = get_max_len(train_df['X']) test_x_max_len = get_max_len(test_df['X']) x_max_len = max(train_x_max_len, test_x_max_len) # 获取标签数据 适当的最大长度 train_y_max_len = get_max_len(train_df['Report']) vocab, _ = get_vocab(config.save_wv_model_path) # 训练集X处理 train_df['X'] = train_df['X'].apply( lambda x: mark_proc(x, x_max_len, vocab)) # 训练集Y处理 train_df['Y'] = train_df['Report'].apply( lambda x: mark_proc(x, train_y_max_len, vocab)) # 测试集X处理 test_df['X'] = test_df['X'].apply(lambda x: mark_proc(x, x_max_len, vocab)) # 保存中间结果数据 train_df['X'].to_csv(config.train_x_pad_path, index=None, header=False) train_df['Y'].to_csv(config.train_y_pad_path, index=None, header=False) test_df['X'].to_csv(config.test_x_pad_path, index=None, header=False) # 处理seq2seq数据集后,重新训练Word2Vec