tgt_sequence_size = 8 if __name__ == "__main__": dataset = Dataset(bd.en_dict, bd.cn_dict, bd.sentence_pair_demo, src_sequence_size, tgt_sequence_size) model = Transformer(src_vocab_size=len(bd.en_dict), tgt_vocab_size=len(bd.cn_dict), word_emb_dim=8, tgt_sequence_size=8) loss_f = torch.nn.NLLLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) model.train() upper_tri = get_upper_triangular(8) for i in range(step): optimizer.zero_grad() src, tgt_in, tgt_out, _, _ = dataset.get_batch(batch_size=1) output = model(src, tgt_in, tgt_mask=upper_tri) loss = loss_f(torch.log(output), tgt_out) if total_loss < 0: total_loss = loss.detach().numpy() else: total_loss = total_loss * 0.95 + loss.detach().numpy() * 0.05 loss.backward() optimizer.step() if (i + 1) % 100 == 0: print("step: ", i + 1, "loss:", total_loss) if not os.path.exists(save_folder): os.mkdir(save_folder) save_path = os.path.join(save_folder, save_file) torch.save(model, save_path)
if __name__ == "__main__": dataset = Dataset(bd.en_dict, bd.cn_dict, bd.sentence_pair_demo, src_sequence_size, tgt_sequence_size) model = Transformer(src_vocab_size=len(bd.en_dict), tgt_vocab_size=len(bd.cn_dict), word_emb_dim=8, tgt_sequence_size=8, num_encoder_layers=1, num_decoder_layers=1, dropout_prob=0) loss_f = torch.nn.NLLLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) model.train() for i in range(step): optimizer.zero_grad() src, tgt_in, tgt_out, src_pad_mask, tgt_pad_mask = dataset.get_batch( batch_size=1, need_padding_mask=True) output = model(src, tgt_in, src_key_padding_mask=src_pad_mask, tgt_key_padding_mask=tgt_pad_mask) loss = loss_f(torch.log(output), tgt_out) if total_loss < 0: total_loss = loss.detach().numpy() else: total_loss = total_loss * 0.95 + loss.detach().numpy() * 0.05 loss.backward() optimizer.step() if (i + 1) % 100 == 0: print("step: ", i + 1, "loss:", total_loss) if not os.path.exists(save_folder):