Esempio n. 1
0
def evaluate(split):
    # Turn on evaluation mode which disables dropout.
    global ntokens

    model.eval()
    total_loss, nbatches = 0, 0
    # ntokens = len(corpus.dictionary.idx2word) if not args.lm1b else ntokens
    hidden = model.init_hidden(args.eval_batch_size)

    if not args.lm1b:
        data_gen = corpus.iter(split, args.eval_batch_size, args.bptt, use_cuda=args.cuda)
    else:
        data_gen = test_corpus.batch_generator(seq_length=args.bptt, batch_size=eval_batch_size, shuffle=False)

    for item in data_gen:

        if args.lm1b:
            source, target, word_cnt, batch_num = get_batch(item)
        else:
            source, target = item

        model.softmax.set_target(target.data.view(-1))

        output, hidden = model(source, hidden)

        total_loss += criterion(output, target.view(-1)).data.sum()

        hidden = repackage_hidden(hidden)
        nbatches += 1
    return total_loss / nbatches
Esempio n. 2
0
def train():
    global lr, best_val_loss
    # Turn on training mode which enables dropout.
    model.train()
    total_loss, nbatches = 0, 0
    start_time = time.time()
    hidden = model.init_hidden(args.batch_size)

    if not args.lm1b:
        data_gen = corpus.iter('train', args.batch_size, args.bptt, use_cuda=args.cuda)
    else:
        data_gen = train_corpus.batch_generator(seq_length=args.bptt, batch_size=args.batch_size)

    for b, batch in enumerate(data_gen):
        model.train()
        if args.lm1b:
            source, target, word_cnt, batch_len = get_batch(batch)
        else:
            source, target = batch
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()  # optimizer.zero_grad()
        model.softmax.set_target(target.data.view(-1))
        output, hidden = model(source, hidden)
        loss = criterion(output, target.view(-1))
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        optimizer.step()
        # for p in model.parameters():
        #     if p.grad is not None:
        #         p.data.add_(-lr, p.grad.data)

        total_loss += loss.data.cpu()
        # logging.info(total_loss)

        if b % args.log_interval == 0 and b > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            if not args.valid_per_epoch:
                val_loss = evaluate('valid')
                logging.info('| epoch {:3d} | batch {:5d} | lr {:02.5f} | ms/batch {:5.2f} | '
                        'loss {:5.2f} | ppl {:8.2f} | valid loss {:5.2f} | valid ppl {:8.2f}'.format(
                    epoch, b, lr,
                    elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss),
                    val_loss, math.exp(val_loss)))
            else:
                logging.info('| epoch {:3d} | batch {:5d} | lr {:02.5f} | ms/batch {:5.2f} | '
                      'loss {:5.2f} | ppl {:8.2f} '.format(
                    epoch, b, lr,
                    elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)))

            total_loss = 0
            start_time = time.time()
Esempio n. 3
0
def train():
    global lr, best_val_loss
    # Turn on training mode which enables dropout.
    model.train()
    total_loss, nbatches = 0, 0
    start_time = time.time()
    ntokens = len(corpus.dictionary.idx2word)
    hidden = model.init_hidden(args.batch_size)
    for b, batch in enumerate(
            corpus.iter('train',
                        args.batch_size,
                        args.bptt,
                        use_cuda=args.cuda)):
        model.train()
        source, target = batch
        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        model.zero_grad()
        model.softmax.set_target(target.data.view(-1))
        output, hidden = model(source, hidden)
        loss = criterion(output, target.view(-1))
        loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        for p in model.parameters():
            if p.grad is not None:
                p.data.add_(-lr, p.grad.data)

        total_loss += loss.data.cpu()

        if b % args.log_interval == 0 and b > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time
            val_loss = evaluate('valid')
            print(
                '| epoch {:3d} | batch {:5d} | lr {:02.5f} | ms/batch {:5.2f} | '
                'loss {:5.2f} | ppl {:8.2f} | valid loss {:5.2f} | valid ppl {:8.2f}'
                .format(epoch, b, lr, elapsed * 1000 / args.log_interval,
                        cur_loss, math.exp(cur_loss), val_loss,
                        math.exp(val_loss)))

            # Save the model if the validation loss is the best we've seen so far.
            if not best_val_loss or val_loss < best_val_loss:
                with open(args.save, 'wb') as f:
                    torch.save(model, f)
                best_val_loss = val_loss
            else:
                # Anneal the learning rate if no improvement has been seen in the validation dataset.
                lr *= args.ar

            total_loss = 0
            start_time = time.time()
Esempio n. 4
0
def evaluate(split):
    # Turn on evaluation mode which disables dropout.
    model.eval()
    total_loss, nbatches = 0, 0
    ntokens = len(corpus.dictionary.idx2word)
    hidden = model.init_hidden(args.eval_batch_size)
    for source, target in corpus.iter(split, args.eval_batch_size, args.bptt, use_cuda=args.cuda):
        model.softmax.set_target(target.data.view(-1))
        output, hidden = model(source, hidden)
        total_loss += criterion(output, target.view(-1)).data.sum()
        hidden = repackage_hidden(hidden)
        nbatches += 1
    return total_loss / nbatches