def train(params): # GPU资源配置 config_gpu(use_cpu=False, gpu_memory=params['gpu_memory']) # 读取vocab训练 print("Building the model ...") vocab = Vocab(params["vocab_path"], params["max_vocab_size"]) params['vocab_size'] = vocab.count # 构建模型 print("Building the model ...") # model = Seq2Seq(params) model = PGN(params) print("Creating the batcher ...") dataset = batcher(vocab, params) # print('dataset is ', dataset) # 获取保存管理者 print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(PGN=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, params['checkpoint_dir'], max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") # 训练模型 print("Starting the training ...") train_model(model, dataset, params, checkpoint_manager)
def test(params): assert params["mode"].lower() in ["test", "eval"], "change training mode to 'test' or 'eval'" assert params["beam_size"] == params["batch_size"], "Beam size must be equal to batch_size, change the params" # GPU资源配置 config_gpu(use_cpu=True) print("Building the model ...") model = PGN(params) print("Creating the vocab ...") vocab = Vocab(params["vocab_path"], params["vocab_size"]) params['vocab_size'] = vocab.count print("Creating the batcher ...") b = batcher(vocab, params) print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(PGN=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, checkpoint_dir, max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") print("Model restored") if params['mode'] == 'eval' or params['mode'] == 'test': for batch in b: #print("batch is:\n", batch) yield beam_decode(model, batch, vocab, params) else: for batch in b: print(beam_decode(model, batch, vocab, params))
def predict_result(model, params, vocab, result_save_path): dataset, _ = batcher(vocab, params) if params['decode_mode'] == 'beam': results = [] for batch in tqdm(dataset): best_hyp = beam_decode(model, batch, vocab, params, print_info=True) results.append(best_hyp.abstract) else: # 预测结果 results = greedy_decode(model, dataset, vocab, params) results = list(map(lambda x: x.replace(" ", ""), results)) # 保存结果 save_predict_result(results, result_save_path) return results
def train(params): # GPU资源配置 config_gpu() # 读取vocab训练 print("Building the model ...") vocab = Vocab(params["vocab_path"], params["max_vocab_size"]) params['vocab_size'] = vocab.count # 构建模型 print("Building the model ...") # model = Seq2Seq(params) model = PGN(params) print("Creating the batcher ...") dataset, params['steps_per_epoch'] = batcher(vocab, params) # print('dataset is ', dataset) # 获取保存管理者 print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(PGN=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, params['checkpoint_dir'], max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) params["trained_epoch"] = int(checkpoint_manager.latest_checkpoint[-1]) else: print("Initializing from scratch.") params["trained_epoch"] = 1 # 学习率衰减 params["learning_rate"] *= np.power(0.95, params["trained_epoch"]) print('learning_rate:{}'.format(params["learning_rate"])) # 训练模型 print("Starting the training ...") train_model(model, dataset, params, checkpoint_manager)
def train(params): # GPU资源配置 # config_gpu(use_cpu=False, gpu_memory=params['gpu_memory']) gpus = tf.config.experimental.list_physical_devices(device_type='GPU') if gpus: tf.config.experimental.set_visible_devices(devices=gpus[0], device_type='GPU') tf.config.experimental.set_memory_growth(gpus[0], enable=True) # 读取vocab训练 print("Building the model ...") vocab = Vocab(params["vocab_path"], params["max_vocab_size"]) params['vocab_size'] = vocab.count # 构建模型 print("Building the model ...") # model = Seq2Seq(params) model = PGN(params) print("Creating the batcher ...") dataset = batcher(vocab, params) # print('dataset is ', dataset) # 获取保存管理者 print("Creating the checkpoint manager") checkpoint = tf.train.Checkpoint(PGN=model) checkpoint_manager = tf.train.CheckpointManager(checkpoint, params['checkpoint_dir'], max_to_keep=5) checkpoint.restore(checkpoint_manager.latest_checkpoint) if checkpoint_manager.latest_checkpoint: print("Restored from {}".format(checkpoint_manager.latest_checkpoint)) else: print("Initializing from scratch.") # 训练模型 print("Starting the training ...") train_model(model, dataset, params, checkpoint_manager)
def train_model(model, vocab, params, checkpoint_manager): epochs = params['epochs'] batch_size = params['batch_size'] pad_index = vocab.word2id[vocab.PAD_TOKEN] start_index = vocab.word2id[vocab.START_DECODING] # 计算vocab size params['vocab_size'] = vocab.count optimizer = tf.keras.optimizers.Adam(name='Adam', learning_rate=0.01) loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none') # 定义损失函数 def loss_function(real, pred): mask = tf.math.logical_not(tf.math.equal(real, pad_index)) loss_ = loss_object(real, pred) mask = tf.cast(mask, dtype=loss_.dtype) loss_ *= mask return tf.reduce_mean(loss_) # 训练 @tf.function(input_signature=(tf.TensorSpec(shape=[params["batch_size"], None], dtype=tf.int32), tf.TensorSpec(shape=[params["batch_size"], params["max_dec_len"]], dtype=tf.int32), tf.TensorSpec(shape=[params["batch_size"], params["max_dec_len"]], dtype=tf.int32))) def train_step(enc_inp, dec_target, dec_input): batch_loss = 0 with tf.GradientTape() as tape: enc_output, enc_hidden = model.call_encoder(enc_inp) # 第一个隐藏层输入 dec_hidden = enc_hidden # 逐个预测序列 predictions, _ = model(dec_input, dec_hidden, enc_output, dec_target) batch_loss = loss_function(dec_target, predictions) variables = model.encoder.trainable_variables + model.decoder.trainable_variables + model.attention.trainable_variables gradients = tape.gradient(batch_loss, variables) optimizer.apply_gradients(zip(gradients, variables)) return batch_loss dataset = batcher(vocab, params) for epoch in range(epochs): start = time.time() total_loss = 0 batch = 0 for encoder_batch_data, decoder_batch_data in dataset: inputs = encoder_batch_data['enc_input'] target = decoder_batch_data['dec_target'] dec_input = decoder_batch_data['dec_input'] # for (batch, (inputs, target)) in enumerate(dataset.take(steps_per_epoch)): batch_loss = train_step(inputs, target, dec_input) total_loss += batch_loss if batch % 50 == 0: print('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, batch, batch_loss.numpy())) batch += 1 # saving (checkpoint) the model every 2 epochs if (epoch + 1) % 2 == 0: ckpt_save_path = checkpoint_manager.save() print('Saving checkpoint for epoch {} at {}'.format(epoch + 1, ckpt_save_path)) print('Epoch {} Loss {:.4f}'.format(epoch + 1, total_loss / batch)) print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))