tgt_pos = tgt_pos.type(torch.LongTensor).to(device) return tgt_seq[:, 1:], enc_slf_attns, dec_slf_attns, dec_enc_attns train_X, train_Y, test_X = load_dataset() train_X, valid_X, train_Y, valid_Y = train_test_split(train_X, train_Y, test_size=0.1, random_state=42) word2id = { PAD_TOKEN: PAD, BOS_TOKEN: BOS, EOS_TOKEN: EOS, UNK_TOKEN: UNK, } vocab_X = Vocab(word2id=word2id) vocab_Y = Vocab(word2id=word2id) vocab_X.build_vocab(train_X, min_count=MIN_COUNT) vocab_Y.build_vocab(train_Y, min_count=MIN_COUNT) vocab_size_X = len(vocab_X.id2word) vocab_size_Y = len(vocab_Y.id2word) train_X = [sentence_to_ids(vocab_X, sentence) for sentence in train_X] train_Y = [sentence_to_ids(vocab_Y, sentence) for sentence in train_Y] valid_X = [sentence_to_ids(vocab_X, sentence) for sentence in valid_X] valid_Y = [sentence_to_ids(vocab_Y, sentence) for sentence in valid_Y]
train_X, valid_X, train_Y, valid_Y = train_test_split(train_X, train_Y, test_size=0.1, random_state=42) word2id = { PAD_TOKEN: PAD, BOS_TOKEN: BOS, EOS_TOKEN: EOS, UNK_TOKEN: UNK, } vocab_X = Vocab(word2id=word2id) vocab_Y = Vocab(word2id=word2id) vocab_X.build_vocab(train_X, min_count=min_count) vocab_Y.build_vocab(train_Y, min_count=min_count) vocab_size_X = len(vocab_X.id2word) vocab_size_Y = len(vocab_Y.id2word) train_X = [sentence_to_ids(vocab_X, sentence) for sentence in train_X] train_Y = [sentence_to_ids(vocab_Y, sentence) for sentence in train_Y] valid_X = [sentence_to_ids(vocab_X, sentence) for sentence in valid_X] valid_Y = [sentence_to_ids(vocab_Y, sentence) for sentence in valid_Y] train_dataloader = DataLoader(train_X, train_Y, batch_size) valid_dataloader = DataLoader(valid_X, valid_Y, batch_size, shuffle=False) model_args = { 'input_size': vocab_size_X,