def model_fn(model_dir):
    logger.info('Loading the model.')
    model_info = {}
    with open(os.path.join(model_dir, 'model_info.pth'), 'rb') as f:
        model_info = torch.load(f)
    print('model_info: {}'.format(model_info))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info('Current device: {}'.format(device))
    model = RNNModel(rnn_type=model_info['rnn_type'], ntoken=model_info['ntoken'],
                     ninp=model_info['ninp'], nhid=model_info['nhid'], nlayers=model_info['nlayers'],
                     dropout=model_info['dropout'], tie_weights=model_info['tie_weights'])
    with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f))
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        model.rnn.flatten_parameters()
    model.to(device).eval()
    logger.info('Loading the data.')
    corpus = data.Corpus(model_dir)
    logger.info('Done loading model and corpus. Corpus dictionary size: {}'.format(len(corpus.dictionary)))
    return {'model': model, 'corpus': corpus}
Exemple #2
0
def model_fn(model_dir):
    logger.info('Loading the model.')
    model_info = {}
    with open(os.path.join(model_dir, 'model_info.pth'), 'rb') as f:
        model_info = torch.load(f)
    print('model_info: {}'.format(model_info))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    logger.info('Current device: {}'.format(device))
    model = RNNModel(rnn_type=model_info['rnn_type'], ntoken=model_info['ntoken'],
                     ninp=model_info['ninp'], nhid=model_info['nhid'], nlayers=model_info['nlayers'],
                     dropout=model_info['dropout'], tie_weights=model_info['tie_weights'])
    with open(os.path.join(model_dir, 'model.pth'), 'rb') as f:
        model.load_state_dict(torch.load(f))
        # after load the rnn params are not a continuous chunk of memory
        # this makes them a continuous chunk, and will speed up forward pass
        model.rnn.flatten_parameters()
    model.to(device).eval()
    logger.info('Loading the data.')
    corpus = data.Corpus(model_dir)
    logger.info('Done loading model and corpus. Corpus dictionary size: {}'.format(len(corpus.dictionary)))
    return {'model': model, 'corpus': corpus}
            'val_ppl': math.exp(val_loss),
        }
        print('Saving the best model: {}'.format(best_state))
        with open(checkpoint_path, 'wb') as f:
            torch.save(model.state_dict(), f)
        with open(checkpoint_state_path, 'w') as f:
            f.write('epoch {:3d} | lr: {:5.2f} | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(epoch, lr, val_loss,
                                               math.exp(val_loss)))
    else:
        # Anneal the learning rate if no improvement has been seen in the validation dataset.
        lr /= 4.0

# Load the best saved model.
with open(checkpoint_path, 'rb') as f:
    model.load_state_dict(torch.load(f))
    # after load the rnn params are not a continuous chunk of memory
    # this makes them a continuous chunk, and will speed up forward pass
    model.rnn.flatten_parameters()

# Run on test data.
test_loss = evaluate(test_data)
print('=' * 89)
print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    test_loss, math.exp(test_loss)))
print('=' * 89)

# Move the best model to cpu and resave it
with open(model_path, 'wb') as f:
    torch.save(model.cpu().state_dict(), f)
            'val_loss': val_loss,
            'val_ppl': math.exp(val_loss),
        }
        print('Saving the best model: {}'.format(best_state))
        with open(checkpoint_path, 'wb') as f:
            torch.save(model.state_dict(), f)
        with open(checkpoint_state_path, 'w') as f:
            f.write('epoch {:3d} | lr: {:5.2f} | valid loss {:5.2f} | '
                    'valid ppl {:8.2f}'.format(epoch, lr, val_loss, math.exp(val_loss)))
    else:
        # Anneal the learning rate if no improvement has been seen in the validation dataset.
        lr /= 4.0

# Load the best saved model.
with open(checkpoint_path, 'rb') as f:
    model.load_state_dict(torch.load(f))
    # after load the rnn params are not a continuous chunk of memory
    # this makes them a continuous chunk, and will speed up forward pass
    model.rnn.flatten_parameters()

# Run on test data.
test_loss = evaluate(test_data)
print('=' * 89)
print('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
    test_loss, math.exp(test_loss)))
print('=' * 89)

# Move the best model to cpu and resave it
with open(model_path, 'wb') as f:
    torch.save(model.cpu().state_dict(), f)