def test_translation(): print("=====Translation Test Case======") french = Language(path='data/train.fr.txt') english = Language(path='data/train.en.txt') french.build_vocab() english.build_vocab() model = Seq2Seq(french, english, attention_type='dot') model.load_state_dict(torch.load("sanity_check.pth", map_location='cpu')) sentence = torch.Tensor([4, 6, 40, 41, 42, 43, 44, 13]).to(torch.long) translated, distributions = model.translate(sentence) # the first test assert translated.tolist() == [4, 16, 9, 56, 114, 51, 1, 14, 3], \ "Your translation does not math expected result." print("The first test passed!") # the second test expected_dist = torch.Tensor( [[9.98170257e-01, 1.74237683e-03, 7.48323873e-05], [1.94309454e-03, 9.82858062e-01, 4.87918453e-03], [2.26807110e-02, 7.29433298e-02, 3.17393959e-01]]) assert distributions[:3, :3].allclose(expected_dist, atol=1e-7), \ "Your attetion distribution does not math expected result." print("The second test passed!") # the third test sentence = torch.Tensor([4, 6, 40, 41, 42, 43, 44, 13]).to(torch.long) translated, _ = model.translate(sentence, max_len=4) assert translated.tolist() == [4, 16, 9, 56], \ "max_len parameter dose not work properly." print("The third test passed!") print("All 3 tests passed!")
def test_initializer_and_forward(): print("=====Model Initializer & Forward Test Case======") french = Language(path='data/train.fr.txt') english = Language(path='data/train.en.txt') french.build_vocab() english.build_vocab() dataset = NmtDataset(src=french, trg=english) model = Seq2Seq(french, english, attention_type='dot') # the first test try: model.load_state_dict( torch.load("sanity_check.pth", map_location='cpu')) except Exception as e: print( "Your model initializer is wrong. Check the handout and comments in details and implement the model precisely." ) raise e print("The first test passed!") batch_size = 8 max_pad_len = 5 sentence_length = list( map(lambda pair: (len(pair[0]), len(pair[1])), dataset)) batch_indices = [[0, 1, 2, 3, 4, 5, 6, 7]] dataloader = torch.utils.data.dataloader.DataLoader( dataset, collate_fn=collate_fn, num_workers=0, batch_sampler=batch_indices) batch = next(iter(dataloader)) loss = model(batch[0], batch[1]) # the second test assert loss.detach().allclose(torch.tensor(3.03703070), atol=1e-7), \ "Loss of the model does not match expected result." print("The second test passed!") loss.backward() # the third test expected_grad = torch.Tensor( [[-8.29117271e-05, -4.44278521e-05, -2.64967621e-05], [-3.89243884e-04, -1.29778590e-03, -4.56827343e-04], [-2.76966626e-03, -1.00148167e-03, -6.68873254e-05]]) assert model.encoder.weight_ih_l0.grad[:3, :3].allclose(expected_grad, atol=1e-7), \ "Gradient of the model does not match expected result." print("The third test passed!") print("All 3 tests passed!")
def train(): max_epoch = 200 batch_size = 256 french = Language(path='data/train.fr.txt') english = Language(path='data/train.en.txt') french.build_vocab() english.build_vocab() dataset = NmtDataset(src=french, trg=english) max_pad_len = 5 sentence_length = list( map(lambda pair: (len(pair[0]), len(pair[1])), dataset)) batch_sampler = bucketed_batch_indices( sentence_length, batch_size=batch_size, max_pad_len=max_pad_len) if bucketing else None model = Seq2Seq(french, english, attention_type=attention_type, embedding_dim=embedding_dim, hidden_dim=hidden_dim).to(device) optimizer = torch.optim.Adam(model.parameters()) dataloader = torch.utils.data.dataloader.DataLoader( dataset, collate_fn=collate_fn, num_workers=2, batch_size=1 if bucketing else batch_size, batch_sampler=batch_sampler, shuffle=not bucketing) loss_log = tqdm(total=0, bar_format='{desc}', position=2) for epoch in trange(max_epoch, desc="Epoch", position=0): for src_sentence, trg_sentence in tqdm(dataloader, desc="Iteration", position=1): optimizer.zero_grad() src_sentence, trg_sentence = src_sentence.to( device), trg_sentence.to(device) loss = model(src_sentence, trg_sentence, teacher_force=0.5) loss.backward() optimizer.step() des = 'Loss per a non-<PAD> Word: {:06.4f}'.format(loss.cpu()) loss_log.set_description_str(des) torch.save(model.state_dict(), "seq2seq_" + attention_type + ".pth")
def translate(): SOS = Language.SOS_TOKEN_IDX EOS = Language.EOS_TOKEN_IDX french_train = Language(path='data/train.fr.txt') english_train = Language(path='data/train.en.txt') french_train.build_vocab() english_train.build_vocab() model = Seq2Seq(french_train, english_train, attention_type=attention_type, embedding_dim=embedding_dim, hidden_dim=hidden_dim).to(device) model.load_state_dict( torch.load("seq2seq_" + attention_type + ".pth", map_location=device)) french_test = Language(path='data/test.fr.txt') english_test = Language(path='data/test.en.txt') french_test.set_vocab(french_train.word2idx, french_train.idx2word) english_test.set_vocab(english_train.word2idx, english_train.idx2word) dataset = NmtDataset(src=french_test, trg=english_test) samples = [dataset[0][0], dataset[1][0], dataset[2][0]] # You may choose your own samples to plot for i, french in enumerate(samples): translated, attention = model.translate( torch.Tensor(french).to(dtype=torch.long, device=device)) source_text = [french_train.idx2word[idx] for idx in french] translated_text = [english_train.idx2word[idx] for idx in translated] plot_attention(attention.cpu().detach(), translated_text, source_text, name=attention_type + '_' + str(i)) f = open('translated.txt', mode='w', encoding='utf-8') f_bleu = open('pred.en.txt', mode='w', encoding='utf-8') for french, english in tqdm(dataset, desc='Translated'): translated, attention = model.translate( torch.Tensor(french).to(dtype=torch.long, device=device)) source_text = [french_train.idx2word[idx] for idx in french] target_text = [ english_train.idx2word[idx] for idx in english if idx != SOS and idx != EOS ] translated_text = [ english_train.idx2word[idx] for idx in translated if idx != EOS ] f.write('French : ' + ' '.join(source_text) + '\n') f.write('English : ' + ' '.join(target_text) + '\n') f.write('Translated: ' + ' '.join(translated_text) + '\n\n') f_bleu.write(' '.join(translated_text) + '\n') f.close() f_bleu.close()
import torch from dataset import Language, NmtDataset from model import Seq2Seq from run import plot_attention attention_type = 'concat' # 'dot' or 'concat' embedding_dim = 128 hidden_dim = 64 bucketing = True device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if __name__ == "__main__": french_train = Language(path='data/train.fr.txt') english_train = Language(path='data/train.en.txt') french_train.build_vocab() english_train.build_vocab() model = Seq2Seq(french_train, english_train, attention_type=attention_type, embedding_dim=embedding_dim, hidden_dim=hidden_dim).to(device) model.load_state_dict( torch.load("seq2seq_" + attention_type + ".pth", map_location=device)) french_test = Language(path='data/test.fr.txt') english_test = Language(path='data/test.en.txt') french_test.set_vocab(french_train.word2idx, french_train.idx2word) english_test.set_vocab(english_train.word2idx, english_train.idx2word) dataset = NmtDataset(src=french_test, trg=english_test) samples = [dataset[0][0], dataset[1][0],