def sample(model: CharRNN, char2int: dict, prime='The', num_chars=1000, top_k=5): """ Given a network and a char2int map, predict the next 1000 characters """ device = next(model.parameters()).device.type int2char = {ii: ch for ch, ii in char2int.items()} # set our model to evaluation mode, we use dropout after all model.eval() # First off, run through the prime characters chars = [char2int[ch] for ch in prime] h = model.init_hidden(1, device) for ch in chars: char, h = predict(model, ch, h, top_k, device) chars.append(char) # Now pass in the previous character and get a new one for ii in range(num_chars): char, h = predict(model, chars[-1], h, top_k, device) chars.append(char) return ''.join(int2char[c] for c in chars)
def validate(args, model: CharRNN, criterion, char_to_id, pbar=False): model.eval() valid_corpus = Path(args.valid_corpus).read_text(encoding='utf8') batch_size = 1 window_size = 4096 hidden = model.init_hidden(batch_size) total_loss = n_chars = 0 total_word_loss = n_words = 0 r = tqdm.trange if pbar else range for idx in r( 0, min(args.valid_chars or len(valid_corpus), len(valid_corpus) - 1), window_size): chunk = valid_corpus[idx:idx + window_size + 1] inputs = variable(char_tensor(chunk[:-1], char_to_id).unsqueeze(0), volatile=True) targets = variable(char_tensor(chunk[1:], char_to_id).unsqueeze(0)) losses = [] for c in range(inputs.size(1)): output, hidden = model(inputs[:, c], hidden) loss = criterion(output.view(batch_size, -1), targets[:, c]) losses.append(loss.data[0]) n_chars += 1 total_loss += np.sum(losses) word_losses = word_loss(chunk, losses) total_word_loss += np.sum(word_losses) n_words += len(word_losses) mean_loss = total_loss / n_chars mean_word_perplexity = np.exp(total_word_loss / n_words) print('Validation loss: {:.3}, word perplexity: {:.1f}'.format( mean_loss, mean_word_perplexity)) return { 'valid_loss': mean_loss, 'valid_word_perplexity': mean_word_perplexity }
def train_model(model: CharRNN, criterion, optimizer, inputs: Variable, targets: Variable) -> float: batch_size = inputs.size(0) window_size = inputs.size(1) hidden = cuda(model.init_hidden(batch_size)) model.zero_grad() loss = 0 for c in range(window_size): output, hidden = model(inputs[:, c], hidden) loss += criterion(output.view(batch_size, -1), targets[:, c]) loss.backward() optimizer.step() return loss.data[0] / window_size
validation_data = CharacterDataset(validation_text, vocabulary, batch_size=batch_size, seq_length=seq_length, device=device) # and make our data loaders # batch size is exactly 1 character by default, which is exactly what we need train_loader = DataLoader(train_data) validation_loader = DataLoader(validation_data) # Part 3: modelling # we create our model model = CharRNN(num_chars).to(device) # and the initial hidden state (a tensor of zeros) initial_state = model.init_hidden(batch_size, device) # we evaluate the capability of our model # a character to parameter ratio approaching 1 is optimal # too many parameters and the model may overfit # too few and the model may underfit char_param_ratio = len(text) / count_parameters(model) print("Character to model parameter ratio: %f\n" % char_param_ratio) # Part 4: training train(model, initial_state, train_loader=train_loader, validation_loader=validation_loader, epochs=100)