Esempio n. 1
0
def evaluate(opts, device, corpus, model, criterion, epoch):
    """
    Parameters
    ----------
        opts: command line arguments
        device: device type
        corpus: Corpus
        model: Model
        criterion: loss function
        epoch: current epoch
    Return
    ------
        total_loss: float
    """
    epoch_start_time = time.time()
    # Turn on evaluation mode which disables dropout.
    model.eval()
    #total_loss = 0.0
    val_loss = Metric('val_loss')
    # Do not back propagation
    with torch.no_grad():
        for batch_id, batch in enumerate(
                data.data2batch(corpus.valid,
                                corpus.dictionary,
                                opts.batch_size,
                                flag_shuf=True)):
            hidden = model.init_hidden(batch)
            # Cut the computation graph (Initialize)
            hidden = models.repackage_hidden(hidden)
            # LongTensor of token_ids [seq_len, batch_size]
            input = model.batch2input(batch, device)
            # target_flat: LongTensor of token_ids [seq_len*batch_size]
            target_flat = model.batch2flat(batch, device)
            # clear previous gradients
            model.zero_grad()
            # output: [seq_len, nbatch, ntoken], hidden: [nlayer, nbatch, nhid]
            output, hidden = model(input, hidden)
            # output_flat: LongTensor of token_ids [seq_len*batch_size, ntoken]
            output_flat = output.view(-1, output.shape[2])
            # target_flat: LongTensor of token_ids [seq_len*batch_size]
            #total_loss += criterion(output_flat, target_flat).item()
            val_loss.update(criterion(output_flat, target_flat))
            total_num = batch_id + 1
    #total_loss /= total_num
    total_loss = val_loss.avg.item()
    if verbose == 1:
        print('-' * 89)
        try:
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
                'valid ppl {:8.2f}'.format(epoch,
                                           (time.time() - epoch_start_time),
                                           total_loss, math.exp(total_loss)))
        except:
            print("Warning: math error")
        print('-' * 89)
    return total_loss
Esempio n. 2
0
def pred2batch(dictionary, model, word_ids, bsz, device):
    input_data = [[[word_ids[sid]],
                   [[
                       dictionary.char_conv2id(char)
                       for char in dictionary.conv2word(word_ids[sid])
                   ]]] for sid in range(bsz)]
    for input_batch in data.data2batch(input_data, dictionary, bsz):
        hidden = model.init_hidden(input_batch)
        hidden = models.repackage_hidden(hidden)
        input_dict = model.word2input(input_batch, device)
        return input_dict, hidden
Esempio n. 3
0
def evaluate(opts, corpus, input_texts, model, criterion, device):
    """
    Parameter
    ---------
        corpus: Corpus
    Return
    ------
        total_loss: float
    """
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss = 0.
    ntokens = corpus.dictionary.tok_len()
    results = []
    # Do not propagate gradients
    with torch.no_grad():
        for batch_id, batch in enumerate(
                data.data2batch(input_texts,
                                corpus.dictionary,
                                opts.batch_size,
                                flag_shuf=False)):
            hidden = model.init_hidden(batch)
            # Cut the computation graph (Initialize)
            hidden = models.repackage_hidden(hidden)
            # LongTensor of token_ids [seq_len, batch_size]
            input = model.batch2input(batch, device)
            seq_len = input["word"].shape[0]
            batch_size = input["word"].shape[1]
            # target_flat: LongTensor of token_ids [seq_len*batch_size]
            target_flat = model.batch2flat(batch, device)
            # clear previous gradients
            model.zero_grad()
            # output: [seq_len, nbatch, ntoken], hidden: [nlayer, nbatch, nhid]
            output, hidden = model(input, hidden)
            # output_flat: LongTensor of token_ids [seq_len*batch_size, ntoken]
            output_flat = output.view(-1, output.shape[2])
            # batch_loss: LongTensor of token_ids [seq_len*batch_size]
            batch_loss = criterion(output_flat, target_flat)
            # batch_loss: LongTensor of token_ids [seq_len, batch_size]
            batch_loss = batch_loss.reshape(seq_len, batch_size)
            # batch_loss: LongTensor of token_ids [batch_size]
            batch_loss = torch.mean(batch_loss, 0)
            for sent_loss in batch_loss:
                ppl = math.exp(sent_loss)
                results.append(ppl)
    return results
Esempio n. 4
0
def train(opts, device, corpus, model, criterion, optimizer, lr, epoch):
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    for batch_id, batch in enumerate(
            data.data2batch(corpus.train,
                            corpus.dictionary,
                            opts.batch_size,
                            flag_shuf=True)):

        input = model.batch2input(batch, device)

        target = model.batch2target(batch, device)
        # clear previous gradients
        model.zero_grad()

        output = model(input)

        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()

        if batch_id % opts.log_interval == 0 and batch_id > 0:
            cur_loss = total_loss / opts.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch_id,
                    len(corpus.train) // opts.batch_size, lr,
                    elapsed * 1000 / opts.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()
Esempio n. 5
0
def evaluate(opts, device, corpus, model, criterion, epoch):

    epoch_start_time = time.time()

    model.eval()
    total_loss = 0.0

    with torch.no_grad():
        for batch_id, batch in enumerate(
                data.data2batch(corpus.valid,
                                corpus.dictionary,
                                opts.batch_size,
                                flag_shuf=True)):

            input = model.batch2input(batch, device)

            target = model.batch2target(batch, device)

            model.zero_grad()

            output = model(input)

            total_loss += criterion(output, target).item()

            total_num = batch_id + 1
    total_loss /= total_num
    print('-' * 89)
    try:
        print('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
              'valid ppl {:8.2f}'.format(epoch,
                                         (time.time() - epoch_start_time),
                                         total_loss, math.exp(total_loss)))
    except:
        print("Warning: math error")
    print('-' * 89)
    return total_loss
Esempio n. 6
0
def train(opts, device, corpus, model, criterion, optimizer, lr, epoch):
    """
    Parameters
    ----------
        opts: command line arguments
        device: device type
        corpus: Corpus
        model: Model
        criterion: loss function
        optimizer: optimizer
        lr: learning rate (float)
        epoch: current epoch
    """
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0.
    start_time = time.time()
    for batch_id, batch in enumerate(
            data.data2batch(corpus.train,
                            corpus.dictionary,
                            opts.batch_size,
                            flag_shuf=True)):
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        # batch[0].shape[1]: nbatch, hidden: [nlayer, nbatch, nhid]
        hidden = model.init_hidden(batch)
        # Cut the computation graph (Initialize)
        hidden = models.repackage_hidden(hidden)
        # LongTensor of token_ids [seq_len, batch_size]
        input = model.batch2input(batch, device)
        # target_flat: LongTensor of token_ids [seq_len*batch_size]
        target_flat = model.batch2flat(batch, device)
        # clear previous gradients
        model.zero_grad()
        # output: [seq_len, nbatch, ntoken], hidden: [nlayer, nbatch, nhid]
        output, hidden = model(input, hidden)
        # output_flat: LongTensor of token_ids [seq_len*batch_size, ntoken]
        output_flat = output.view(-1, output.shape[2])
        # Calculate the mean of all losses.
        # loss: float
        loss = criterion(output_flat, target_flat)
        # Do back propagetion
        loss.backward()
        # Gradient clipping
        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), opts.clip)
        # Update weights
        """
        # SGD
        for p in model.parameters():
            p.data.add_(-lr, p.grad.data)
        """
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()

        if batch_id % opts.log_interval == 0 and batch_id > 0:
            cur_loss = total_loss / opts.log_interval
            elapsed = time.time() - start_time
            print(
                '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f}'.format(
                    epoch, batch_id,
                    len(corpus.train) // opts.batch_size, lr,
                    elapsed * 1000 / opts.log_interval, cur_loss,
                    math.exp(cur_loss)))
            total_loss = 0
            start_time = time.time()