def get_iterator(self, dataset):

        if self.cuda:
            iterator = data.BPTTIterator(dataset, sort_key = None,\
                    bptt_len = self.seq_len, batch_size = self.batch_size)
            iterator.repeat = False
        else:
            iterator = data.BPTTIterator(dataset, sort_key = None,\
                    bptt_len = self.seq_len,  batch_size = self.batch_size, device = -1)
            iterator.repeat = False

        print("Created Iterator with {num} batches".format(num=len(iterator)))
        return iterator
    def get_test_iter(self, file_path: str, batch_size: int) -> BatchIterator:
        """
        Get test data iterator from test data file.

        Args:
            file_path (str): Path to test data file.
            batch_size (int): Batch size

        Returns:
            BatchIterator: An instance of BatchIterator to iterate over the
                supplied test data file.
        """
        test_data = self.gen_dataset_from_path(file_path)
        return BatchIterator(
            textdata.BPTTIterator(
                test_data,
                batch_size=batch_size,
                bptt_len=self.bptt_len,
                device="cuda:{}".format(torch.cuda.curren_device())
                if cuda_utils.CUDA_ENABLED else "cpu",
                sort=True,
                repeat=False,
                train=False,
                sort_key=self.sort_key,
            ),
            self._postprocess_batch,
        )
    def _get_train_iter(
        self,
        train_dataset: textdata.Dataset,
        batch_size: int,
        rank: int = 0,
        world_size: int = 1,
    ) -> BatchIterator:
        dataset_shard, max_num_examples = self._get_dataset_shard(
            train_dataset, rank, world_size)
        # Compute the per-worker batch size
        assert (batch_size >= world_size
                ), "batch size needs to be >= the distributed world size"
        batch_size = batch_size // world_size

        return BatchIterator(
            textdata.BPTTIterator(
                dataset_shard,
                batch_size=batch_size,
                bptt_len=self.bptt_len,
                device="cuda:{}".format(torch.cuda.curren_device())
                if cuda_utils.CUDA_ENABLED else "cpu",
                sort_within_batch=True,
                repeat=False,
                sort_key=self.sort_key,
            ),
            self._postprocess_batch,
            num_batches=math.ceil(max_num_examples / float(batch_size)),
        )
Exemple #4
0
 def __init__(self, root_dir, batch_size=32, length=100):
     self.root_dir = root_dir
     self.field = data.Field(sequential=True, lower=False)
     all_datasets = datasets.PennTreebank.splits(text_field=self.field,
                                                 root=self.root_dir)
     self.train, self.valid, self.test = all_datasets
     self.train_iter = data.BPTTIterator(dataset=self.train,
                                         batch_size=batch_size,
                                         bptt_len=length)
     self.field.build_vocab(self.train)
Exemple #5
0
 def _make_iter(cls, dataset, batch_size, bptt_len):
     if dataset:
         _iter = data.BPTTIterator(
             dataset,
             batch_size=batch_size,
             bptt_len=bptt_len,  # this is where we specify the sequence length
             repeat=False,
             shuffle=True,
         )
     else:
         _iter = []
     return _iter
Exemple #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', default='data/ptb_char')
    parser.add_argument('--model', required=True)
    parser.add_argument('--config', required=True)
    parser.add_argument('--gpu', default=-1, type=int)
    args = parser.parse_args()

    with open(args.config, 'r') as f:
        config = yaml.load(f)
    pprint(config)

    text_field = PTBCharTextField()
    train_dataset, test_dataset = PTBChar.splits(path=args.data,
                                                 validation=None,
                                                 text_field=text_field)
    text_field.build_vocab(train_dataset)

    test_loader = data.BPTTIterator(dataset=test_dataset,
                                    batch_size=1,
                                    bptt_len=2000,
                                    train=False,
                                    device=args.gpu)

    model = PTBModel(num_chars=len(text_field.vocab), **config['model'])
    model.load_state_dict(torch.load(args.model))
    print(model)
    num_params = sum(p.numel() for p in model.parameters())
    print(f'Total parameters: {num_params}')

    if args.gpu > -1:
        model.cuda(args.gpu)

    model.eval()

    state = hyper_state = None
    test_bpc_sum = test_bpc_denom = 0
    for test_batch in tqdm(test_loader):
        test_inputs = test_batch.text
        test_targets = test_batch.target
        test_logits, state, hyper_state = model(inputs=test_inputs,
                                                state=state,
                                                hyper_state=hyper_state)
        test_loss = sequence_cross_entropy(logits=test_logits,
                                           targets=test_targets)
        test_bpc_sum += (test_loss.data[0] / np.log(2)) * test_inputs.size(0)
        test_bpc_denom += test_inputs.size(0)
    test_bpc = test_bpc_sum / test_bpc_denom

    print(f'Test BPC = {test_bpc:.6f}')
Exemple #7
0
    def init_dataloaders(self):
        print('Initializing dataloaders')

        project_path = self.config.firelab.project_path
        data_path_train = os.path.join(project_path, self.config.data.train)
        data_path_val = os.path.join(project_path, self.config.data.val)

        data_train = open(data_path_train).read().splitlines()
        data_val = open(
            data_path_val).read().splitlines()[:self.config.val_set_size]

        self.eos = '|'
        field = Field(eos_token=self.eos,
                      batch_first=True,
                      tokenize=char_tokenize)

        train_examples = [
            Example.fromlist([self.eos.join(data_train)], [('text', field)])
        ]
        val_examples = [
            Example.fromlist([s], [('text', field)]) for s in data_val
        ]

        self.train_ds = Dataset(train_examples, [('text', field)])
        self.val_ds = Dataset(val_examples, [('text', field)])

        field.build_vocab(self.train_ds)

        self.vocab = field.vocab
        self.train_dataloader = data.BPTTIterator(self.train_ds,
                                                  self.config.hp.batch_size,
                                                  self.config.hp.batch_len,
                                                  repeat=False)
        self.val_dataloader = data.BucketIterator(self.val_ds,
                                                  1,
                                                  shuffle=False,
                                                  repeat=False)

        print('Dataloaders initialized!')
Exemple #8
0
    def __init__(self, config, lm_config, device):
        # define all fields
        TEXT = data.ReversibleField(sequential=True, tokenize=self.tokenizer,
                                    lower=False, include_lengths=False)
        POS = data.ReversibleField(sequential=True, lower=False, include_lengths=True)
        NER = data.ReversibleField(sequential=True, lower=False, include_lengths=True)
        LABEL = data.Field(sequential=False, use_vocab=False)
        IN_Q = data.Field(sequential=True, use_vocab=False, include_lengths=True,
                          postprocessing=self.to_numeric)
        IN_C = data.Field(sequential=True, use_vocab=False, include_lengths=True,
                          postprocessing=self.to_numeric)
        LEMMA_IN_Q = data.Field(sequential=True, use_vocab=False, include_lengths=True,
                                postprocessing=self.to_numeric)
        LEMMA_IN_C = data.Field(sequential=True, use_vocab=False, include_lengths=True,
                                postprocessing=self.to_numeric)
        TF = data.Field(sequential=True, use_vocab=False, include_lengths=True,
                        postprocessing=self.to_numeric)
        REL = data.ReversibleField(sequential=True, lower=False, include_lengths=True)

        # load lm data first
        lm_train = datasets.LanguageModelingDataset(os.path.join(lm_config.file_path, lm_config.train_f),
                                                    TEXT, newline_eos=False)
        lm_dev = datasets.LanguageModelingDataset(os.path.join(lm_config.file_path, lm_config.dev_f),
                                                  TEXT, newline_eos=False)

        # load actual data
        # we have keys: 'id', 'd_words', 'd_pos', 'd_ner', 'q_words', 'q_pos', 'c_words',
        #       'label', 'in_q', 'in_c', 'lemma_in_q', 'tf', 'p_q_relation', 'p_c_relation'
        train, val, test = data.TabularDataset.splits(
            path=config.data_dir, train=config.train_fname,
            validation=config.dev_fname, test=config.test_fname, format='json',
            fields={'d_words': ('d_words', TEXT),
                    'd_pos':   ('d_pos', POS),
                    'd_ner':   ('d_ner', NER),
                    'q_words': ('q_words', TEXT),
                    'q_pos':   ('q_pos', POS),
                    'c_words': ('c_words', TEXT),
                    'label': ('label', LABEL),
                    'in_q': ('in_q', IN_Q),
                    'in_c': ('in_c', IN_C),
                    'lemma_in_q': ('lemma_in_q', LEMMA_IN_Q),
                    'lemma_in_c': ('lemma_in_c', LEMMA_IN_C),
                    'tf': ('tf', TF),
                    'p_q_relation': ('p_q_relation', REL),
                    'p_c_relation': ('p_c_relation', REL)
                    })

        print('train: %d, val: %d, test: %d' % (len(train), len(val), len(test)))

        # construct vocabulary
        TEXT.build_vocab(train, val, test, lm_train, lm_dev, vectors=config.vectors)
        POS.build_vocab(train, val, test)
        NER.build_vocab(train, val, test)
        REL.build_vocab(train, val, test)

        print('vocab size: %d' % len(TEXT.vocab))
        print('pos size: %d' % len(POS.vocab))
        print('ner size: %d' % len(NER.vocab))
        print('rel size: %d' % len(REL.vocab))

        self.TEXT = TEXT

        # iterators
        self.lm_train_iter = data.BPTTIterator(lm_train, batch_size=lm_config.batch_size,
                                               bptt_len=lm_config.bptt_len, repeat=False)
        self.lm_dev_iter = data.BPTTIterator(lm_dev, batch_size=lm_config.batch_size,
                                             bptt_len=lm_config.bptt_len, repeat=False)

        print('lm train batch num: %d, lm dev batch num: %d' %
              (len(self.lm_train_iter), len(self.lm_dev_iter)))

        self.train_iter = data.BucketIterator(dataset=train, batch_size=config.batch_size_train,
                                              sort_key=lambda x: len(x.d_words), device=device, shuffle=True,
                                              sort_within_batch=False, repeat=False)

        self.val_iter = data.Iterator(dataset=val, batch_size=config.batch_size_eval,
                                      sort_key=lambda x: len(x.d_words),
                                      train=False, shuffle=False, sort_within_batch=False, device=device,
                                      repeat=False)

        self.test_iter = data.Iterator(dataset=test, batch_size=config.batch_size_test,
                                       sort_key=lambda x: len(x.d_words), train=False, shuffle=False,
                                       sort_within_batch=False, device=device, repeat=False)

        print('train batch num: %d, dev batch num: %d' %
              (len(self.train_iter), len(self.val_iter)))

        # # Create embeddings
        embedding = nn.Embedding(len(TEXT.vocab), config.embed_dim)
        embedding.weight.data.copy_(TEXT.vocab.vectors)
        embedding.weight.requires_grad = False
        self.embedding = embedding.to(device)

        embedding_pos = nn.Embedding(len(POS.vocab), config.embed_dim_pos)
        embedding_pos.weight.data.normal_(0, 0.1)
        self.embedding_pos = embedding_pos.to(device)

        embedding_ner = nn.Embedding(len(NER.vocab), config.embed_dim_ner)
        embedding_ner.weight.data.normal_(0, 0.1)
        self.embedding_ner = embedding_ner.to(device)

        embedding_rel = nn.Embedding(len(REL.vocab), config.embed_dim_rel)
        embedding_rel.weight.data.normal_(0, 0.1)
        self.embedding_rel = embedding_rel.to(device)

        print('embedding', self.embedding)
        print('embedding_pos', self.embedding_pos)
        print('embedding_ner', self.embedding_ner)
        print('embedding_rel', self.embedding_rel)

        self.vocab_size = len(TEXT.vocab)
        print('vocab_size is', self.vocab_size)
Exemple #9
0
def train(flags):
    my_tok = spacy.load('en')

    def spacy_tok(x):
        return [tok.text for tok in my_tok.tokenizer(x)]

    TEXT = data.Field(lower=True, tokenize=spacy_tok)

    dataset = torchtext.datasets.LanguageModelingDataset(
        flags.train_file, TEXT)
    dataset[0].text = dataset[0].text[::-1]

    if flags.custom_embeddings:
        custom_embeddings = torchtext.vocab.Vectors(
            name=os.path.abspath(flags.custom_embeddings))
        TEXT.build_vocab(dataset, vectors=custom_embeddings)
    else:
        TEXT.build_vocab(dataset, vectors="glove.6B.300d")

    weight_matrix = TEXT.vocab.vectors
    vocab = TEXT.vocab

    os.makedirs(flags.save_dir, exist_ok=True)
    with open(os.path.join(flags.save_dir, 'vocab.pkl'), 'wb') as vocab_file:
        pickle.dump(vocab, vocab_file)

    train_iter = data.BPTTIterator(
        dataset,
        batch_size=flags.batch_size,
        bptt_len=flags.
        seq_size,  # this is where we specify the sequence length
        device=torch.device("cuda:0"),
        repeat=False)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    n_vocab, emb_size = weight_matrix.shape

    net = LSTMModel(n_vocab, emb_size, flags.lstm_size, flags.lstm_layers)
    net.embedding.weight.data.copy_(weight_matrix)
    net.set_vocab(vocab)

    net = net.to(device)

    criterion, optimizer = get_loss_and_train_op(net, flags.learning_rate)

    iteration = 0

    for e in range(flags.n_epoch):
        state_h, state_c = net.zero_state(flags.batch_size)

        # Transfer data to GPU
        state_h = state_h.to(device)
        state_c = state_c.to(device)

        for batch in train_iter:
            x, y = batch.text, batch.target

            iteration += 1

            # Tell it we are in training mode
            net.train()

            # Reset all gradients
            optimizer.zero_grad()

            # Transfer data to GPU
            x = torch.tensor(x).to(device)
            y = torch.tensor(y).to(device)

            logits, (state_h, state_c) = net(x, (state_h, state_c))
            loss = criterion(logits.transpose(1, 2), y)

            state_h = state_h.detach()
            state_c = state_c.detach()

            loss_value = loss.item()

            # Perform back-propagation
            loss.backward()

            _ = torch.nn.utils.clip_grad_norm_(net.parameters(),
                                               flags.gradients_norm)

            # Update the network's parameters
            optimizer.step()

            if iteration % 100 == 0:
                print('Epoch: {}/{}'.format(e, flags.n_epoch),
                      'Iteration: {}'.format(iteration),
                      'Loss: {}'.format(loss_value))

            if iteration % 1000 == 0:
                predict(device, net, ['the end'], vocab, top_k=2)
                torch.save(net.state_dict(),
                           os.path.join(flags.save_dir, 'model-last.pth'))
Exemple #10
0
TEXT = data.Field(eos_token=EOS_TOKEN,
                  init_token=BOS_TOKEN,
                  unk_token=UNK_TOKEN,
                  batch_first=False)
train_data = datasets.LanguageModelingDataset(path=os.path.join(
    DATA_BASE_PATH, DATA_DIR, DATA_TRAIN_FILE_NAME),
                                              text_field=TEXT)
valid_data = datasets.LanguageModelingDataset(path=os.path.join(
    DATA_BASE_PATH, DATA_DIR, DATA_VALID_FILE_NAME),
                                              text_field=TEXT)
test_data = datasets.LanguageModelingDataset(path=os.path.join(
    DATA_BASE_PATH, DATA_DIR, DATA_TEST_FILE_NAME),
                                             text_field=TEXT)
TEXT.build_vocab(train_data)
train_iter = data.BPTTIterator(dataset=train_data,
                               batch_size=BATCH_SIZE,
                               bptt_len=BPTT_LEN,
                               device=device)
valid_iter = data.BPTTIterator(dataset=valid_data,
                               batch_size=BATCH_SIZE,
                               bptt_len=BPTT_LEN,
                               device=device)
test_iter = data.BPTTIterator(dataset=test_data,
                              batch_size=BATCH_SIZE,
                              bptt_len=BPTT_LEN,
                              device=device)

# build model

MODEL_SAVE_BASE_PATH = '/home/ubuntu/likun/nlp-practice/language_model'
MODEL_NAME = "PTB-RNN-KERNEL.pt"
MODEL_SAVE_PATH = os.path.join(MODEL_SAVE_BASE_PATH, 'save_models', MODEL_NAME)
train_ptb, valid_ptb, test_ptb = datasets.PennTreebank.splits(
    TEXT, root="treebank.data")
print("PTB datasets constructed.")

TEXT.build_vocab(train_ptb, valid_ptb, test_ptb, train_tweets,
                 valid_tweets)  #9.733 with only PTB, 27.780 in total
print("Vobabulary built.")

# create model
model = Mikolov(len(TEXT.vocab))
#last_checkpoint = torch.load("saved/treebank_for_20_epochs.pt")
#model.load_state_dict(last_checkpoint['model_state_dict'])

# create iterators for training
# here we train on twitter dataset but training on PTB is equivalent
train_iter = data.BPTTIterator(train_tweets, batch_size=1, bptt_len=64)
valid_iter = data.BPTTIterator(valid_tweets, batch_size=1, bptt_len=64)

epochs = 0
valid_losses = []  # validation
while (True):

    print("Training epoch #" + str(epochs + 1) + " starts.")
    bptt_trainer(model, train_iter)
    epochs += 1
    print("Epoch #" + str(epochs) + " completed.")

    valid_loss = validator(model, valid_iter)
    print("Averaged loss on validation set: " + str(valid_loss))
    valid_losses.append(valid_loss)
Exemple #12
0

# Fields are added by column left to write in the underlying table
fields=[('name', NAMES), ('label', LABELS), ('text', TEXT)]

train, dev, test = data.TabularDataset.splits(
    path='.', format='CSV', fields=fields,
    train='train.csv', validation='dev.csv', test='test.csv')

TEXT.build_vocab(train)
# TEXT.vocab.itos[1] ... '<pad>'
# TEXT.vocab.itos[0] ... '<unk>'
LABELS.build_vocab(train)


a = next(iter(data.BPTTIterator(train, 20, 20)))


train_iter, dev_iter, test_iter = data.BPTTIterator.splits(
    ([i.text for i in train], dev, test),
    bptt_len=13,
    batch_size=7,
    sort_key=lambda x: len(x.text),
    device='cpu')




# https://mlexplained.com/2018/02/15/language-modeling-tutorial-in-torchtext-practical-torchtext-part-2/
from torchtext.datasets import WikiText2
train, valid, test = WikiText2.splits(TEXT) # loading custom datas
# Other Stuff
TEST_TEXT = data.Field(lower=True, tokenize=spacy_tok)
trainSet, valid, test = datasets.WikiText2.splits(TEST_TEXT)
myTestSet = datasets.LanguageModelingDataset(
    path="F:\Πτυχιακη\Lab4A\.data\wikitext-2\wikitext-2\TestMine.tokens",
    text_field=TEST_TEXT)

#The Vocabulary is constructed from the dataset so we need to load the large one for more Variety
TEST_TEXT.build_vocab(trainSet, vectors="glove.6B.200d")
myWeight_matrix = TEST_TEXT.vocab.vectors

#Create  Iterator over my input
myTestIter = data.BPTTIterator(dataset=myTestSet,
                               batch_size=batchSize,
                               bptt_len=1,
                               device=torch.device("cuda:0"),
                               repeat=False)

#Load model
#model = RNNModel(myWeight_matrix.size(0), myWeight_matrix.size()[1], 200, 1, 1)

#Model for special case in tokenizer
model = RNN_GRUModel(28869, myWeight_matrix.size()[1], 201, 1, batchSize)

##Model for No special case in tokenizer
#model = RNNModel(28870, myWeight_matrix.size()[1], 200, 1, bsz=1)
#
model.encoder.weight.data.copy_(myWeight_matrix)

# Comment if no model exists
Exemple #14
0
from nntoolbox.metrics import *

MAX_VOCAB_SIZE = 25000
BATCH_SIZE = 16

TEXT = data.Field(tokenize='spacy')
LABEL = data.LabelField(dtype=torch.float)

# train_iterator, val_iterator, test_iterator = WikiText2.iters()
# for tmp in train_iterator:
#     print(tmp)

train_data, val_data, test_data = WikiText2.splits(TEXT)
train_iterator = data.BPTTIterator(train_data,
                                   batch_size=BATCH_SIZE,
                                   sort_within_batch=True,
                                   device=get_device(),
                                   bptt_len=35,
                                   shuffle=True)

val_iterator = data.BPTTIterator(val_data,
                                 batch_size=BATCH_SIZE,
                                 sort_within_batch=True,
                                 device=get_device(),
                                 bptt_len=35,
                                 shuffle=True)

TEXT.build_vocab(train_data, max_size=MAX_VOCAB_SIZE, vectors="glove.6B.100d")
embedding = AdditiveContextEmbedding(num_embeddings=len(TEXT.vocab),
                                     embedding_dim=100)
load_embedding(embedding, TEXT.vocab.vectors)
Exemple #15
0
def main(args):
    device = torch.device('cuda' if args.gpu  else 'cpu')

    if args.re_training is None:
        TEXT = data.Field(
            lower=True, 
            init_token='<bos>', 
            eos_token='<eos>'
        )
    else: 
        basedir, _ = os.path.split(args.re_training)
        path = os.path.join(basedir, 'text.field')
        TEXT = utils.load_field(path)

    fields = [('text', TEXT)] if args.task in monolingual_tasks \
                else [('src', TEXT), ('tgt', TEXT)]

    slen_filter = lambda x: args.src_minlen <= len(x.src) <= args.src_maxlen \
                         and args.tgt_minlen <= len(x.tgt) <= args.tgt_maxlen

    # load training data
    if args.task == 'translation':
        train_data = data.TabularDataset(
                path=args.train,
                format='tsv',
                fields=fields,
                filter_pred=slen_filter,
        )
    else: # `causal`, `masked`
        train_data = datasets.LanguageModelingDataset(
            path=args.train, 
            text_field=TEXT, 
            newline_eos=True
        )

    # set Vocabulary object
    if args.re_training is None:
        TEXT.build_vocab(
            train_data, 
            min_freq=args.min_freq, 
            specials=['<sep>', '<mask>'], 
        )
        if args.embed_path:
            vectors = utils.load_vector(args.embed_path)
            TEXT.vocab.load_vectors(vectors)

    if not os.path.exists(args.savedir):
        os.mkdir(args.savedir)

    # save a field object
    with open(os.path.join(args.savedir, 'text.field'), 'wb') as fout:
        dill.dump(TEXT, fout)
    utils.save_vocab(args.savedir, TEXT)

    # set training iterator
    if args.task == 'translation':
        train_iter = data.BucketIterator(
            train_data, 
            batch_size=args.batch_size,
            sort_within_batch=True,
            sort_key= lambda x: len(x.src),
            repeat=False,
        )
    else: # `causal`, `masked`
        train_iter = data.BPTTIterator(
            train_data, 
            batch_size=args.batch_size, 
            bptt_len=args.bptt_len,
            train=True, 
            repeat=False, 
            shuffle=True,
        )

    print(f'| [text] Dictionary: {len(TEXT.vocab.itos)} types')
    print('')

    print(f'train: {args.train}')
    for name, field in fields:
        n_tokens, n_unk = utils.get_statics(train_iter, name, field)
        print(f'| [{name}] {n_tokens} tokens,', end='')
        print(f' coverage: {100*(n_tokens-n_unk)/n_tokens:.{4}}%')
    print('')

    # build a model
    model_class = get_model(args.task)

    if  args.re_training is None:
        epoch = 1
        iteration = 0
        best_loss = math.inf
        model = model_class(TEXT, args).to(device)
    else:
        load_vars = torch.load(args.re_training)
        epoch = load_vars['epoch'] + 1
        iteration = load_vars['iteration']
        best_loss = load_vars['best_loss']
        lm_args, lm_weights = load_vars['args'], load_vars['weights']
        model = model_class(TEXT, lm_args)
        model.load_state_dict(lm_weights)
        model.to(device)

    criterion = nn.CrossEntropyLoss(ignore_index=TEXT.vocab.stoi['<pad>'])
    optimizer_fn = utils.get_optimizer(args.optimizer)
    optimizer = optimizer_fn(model.parameters(), lr=args.lr)
    trainer = Trainer(model, criterion, optimizer, args.clip, iteration)

    # show the details of model and optimizer
    print('=============== MODEL ===============')
    print(model)
    print('')
    print('=============== OPTIMIZER ===============')
    print(optimizer)
    print('')

    max_epoch = args.max_epoch or math.inf
    max_update = args.max_update or math.inf
    assert not(max_epoch == math.inf and max_update == math.inf), \
        'Please set `--max-epoch` or `--max-update`.'
 
    while epoch <= max_epoch and trainer.n_updates <= max_update:
        # training
        with tqdm(train_iter, dynamic_ncols=True) as pbar:
            train_loss = 0.0
            trainer.model.train()
            for samples in pbar:
                if args.task in monolingual_tasks:
                    srcs = samples.text.to(device)
                    tgts = None
                    refs = None if args.task == 'masked' \
                            else samples.target.to(device)
                else:
                    srcs = samples.src.to(device)
                    tgts = samples.tgt.to(device)
                    refs = None
                loss = trainer.step(srcs, tgts, refs)
                train_loss += loss.item()

                # setting of progressbar
                pbar.set_description(f'epoch {str(epoch).zfill(3)}')
                progress_state = OrderedDict(
                    task=args.task,
                    loss=loss.item(),
                    ppl=math.exp(loss.item()),
                    bsz=srcs.size(1),
                    lr=trainer.get_lr(), 
                    clip=args.clip, 
                    num_updates=trainer.n_updates)
                pbar.set_postfix(progress_state)
        train_loss /= len(train_iter)

        print(f'| epoch {str(epoch).zfill(3)} | train ', end='') 
        print(f'| loss {train_loss:.{4}} ', end='')
        print(f'| ppl {math.exp(train_loss):.{4}} ', end='')
        print(f'| lr {trainer.get_lr():.1e} ', end='')
        print(f'| clip {args.clip} ', end='')
        print(f'| num_updates {trainer.n_updates} |')
        
        # saving model
        save_vars = {
            'epoch': epoch,
            'iteration': trainer.n_updates,
            'best_loss': train_loss if train_loss < best_loss else best_loss,
            'args': args, 
            'weights': model.state_dict()
        }

        if train_loss < best_loss:
            best_loss = train_loss
            filename = os.path.join(args.savedir, 'checkpoint_best.pt') 
            torch.save(save_vars, filename)
        if epoch % args.save_epoch == 0:
            filename = os.path.join(args.savedir, f'checkpoint_{epoch}.pt') 
            torch.save(save_vars, filename)
        filename = os.path.join(args.savedir, 'checkpoint_last.pt') 
        torch.save(save_vars, filename)

        # update
        epoch += 1