def validate(args, model: CharRNN, criterion, char_to_id, pbar=False): model.eval() valid_corpus = Path(args.valid_corpus).read_text(encoding='utf8') batch_size = 1 window_size = 4096 hidden = model.init_hidden(batch_size) total_loss = n_chars = 0 total_word_loss = n_words = 0 r = tqdm.trange if pbar else range for idx in r( 0, min(args.valid_chars or len(valid_corpus), len(valid_corpus) - 1), window_size): chunk = valid_corpus[idx:idx + window_size + 1] inputs = variable(char_tensor(chunk[:-1], char_to_id).unsqueeze(0), volatile=True) targets = variable(char_tensor(chunk[1:], char_to_id).unsqueeze(0)) losses = [] for c in range(inputs.size(1)): output, hidden = model(inputs[:, c], hidden) loss = criterion(output.view(batch_size, -1), targets[:, c]) losses.append(loss.data[0]) n_chars += 1 total_loss += np.sum(losses) word_losses = word_loss(chunk, losses) total_word_loss += np.sum(word_losses) n_words += len(word_losses) mean_loss = total_loss / n_chars mean_word_perplexity = np.exp(total_word_loss / n_words) print('Validation loss: {:.3}, word perplexity: {:.1f}'.format( mean_loss, mean_word_perplexity)) return { 'valid_loss': mean_loss, 'valid_word_perplexity': mean_word_perplexity }
def sample(model: CharRNN, char2int: dict, prime='The', num_chars=1000, top_k=5): """ Given a network and a char2int map, predict the next 1000 characters """ device = next(model.parameters()).device.type int2char = {ii: ch for ch, ii in char2int.items()} # set our model to evaluation mode, we use dropout after all model.eval() # First off, run through the prime characters chars = [char2int[ch] for ch in prime] h = model.init_hidden(1, device) for ch in chars: char, h = predict(model, ch, h, top_k, device) chars.append(char) # Now pass in the previous character and get a new one for ii in range(num_chars): char, h = predict(model, chars[-1], h, top_k, device) chars.append(char) return ''.join(int2char[c] for c in chars)
def main( representation, train=None, generate=None, temperature=DEFAULT_TEMPERATURE, max_generate_len=DEFAULT_MAX_GEN_LEN, generator_prime_str=FILE_START, window_size=DEFAULT_WINDOW_SIZE, batch_size=DEFAULT_BATCH_SIZE, disable_cuda=DEFAULT_DISABLE_CUDA, learning_rate=DEFAULT_LEARNING_RATE, num_epochs=DEFAULT_NUM_EPOCHS, patience=DEFAULT_PATIENCE, recurrent_type=DEFAULT_RECURRENT_TYPE, hidden_size=DEFAULT_RECURRENT_HIDDEN_SIZE, recurrent_layers=DEFAULT_RECURRENT_LAYERS, recurrent_dropout=DEFAULT_RECURRENT_DROPOUT, print_every_iter=DEFAULT_PRINT_EVERY_ITER, log_level=DEFAULT_LOG_LEVEL, ): # https://github.com/pytorch/pytorch/issues/13775 torch.multiprocessing.set_start_method("spawn") logger.addHandler(logging.StreamHandler(sys.stdout)) logger.setLevel(log_level) use_cuda = torch.cuda.is_available() if disable_cuda: use_cuda = False if representation == "char": # Create the neural network structure logger.info("Constructing the neural network architecture...") n_chars = len(CHARACTERS) nn = CharRNN(n_chars, n_chars, hidden_size=hidden_size, recurrent_type=recurrent_type, recurrent_layers=recurrent_layers, recurrent_dropout=recurrent_dropout, use_cuda=use_cuda) if use_cuda: nn.cuda() if train: # Warn if window_size is None, batch_size should be 1 if window_size is None and batch_size is not 1: logger.warning("~" * 40) logger.warning( "WARN: Undefined window_size with batch_size: {}".format( batch_size)) logger.warning( "\tBatches may not have equal sequence lengths!") logger.warning( "\tWindow size should be defined when batch_size > 1.") logger.warning("~" * 40) # Train our model train_full(nn, max_window_size=window_size, learning_rate=learning_rate, n_epochs=num_epochs, patience_threshold=patience, batch_size=batch_size, print_every=print_every_iter, use_cuda=use_cuda) elif generate: progress_path = nn.get_progress_path() # Load our model logger.info("Loading the model weights...") path = nn.get_state_dict_path() if not os.path.isfile(path): raise FileNotFoundError( ("Model does not exist at {}. " + "Manual model renaming required.").format(path)) nn.load_state_dict(torch.load(path)) nn = nn.eval() generate_charseq(nn, prime_str=generator_prime_str, max_window_size=window_size, max_generate_len=max_generate_len, temperature=temperature)