Exemple #1
0
def sample(model: CharRNN,
           char2int: dict,
           prime='The',
           num_chars=1000,
           top_k=5):
    """
    Given a network and a char2int map, predict the next 1000 characters
    """

    device = next(model.parameters()).device.type

    int2char = {ii: ch for ch, ii in char2int.items()}

    # set our model to evaluation mode, we use dropout after all
    model.eval()

    # First off, run through the prime characters
    chars = [char2int[ch] for ch in prime]
    h = model.init_hidden(1, device)
    for ch in chars:
        char, h = predict(model, ch, h, top_k, device)

    chars.append(char)

    # Now pass in the previous character and get a new one
    for ii in range(num_chars):
        char, h = predict(model, chars[-1], h, top_k, device)
        chars.append(char)

    return ''.join(int2char[c] for c in chars)
Exemple #2
0
def validate(args, model: CharRNN, criterion, char_to_id, pbar=False):
    model.eval()
    valid_corpus = Path(args.valid_corpus).read_text(encoding='utf8')
    batch_size = 1
    window_size = 4096
    hidden = model.init_hidden(batch_size)
    total_loss = n_chars = 0
    total_word_loss = n_words = 0
    r = tqdm.trange if pbar else range
    for idx in r(
            0, min(args.valid_chars or len(valid_corpus),
                   len(valid_corpus) - 1), window_size):
        chunk = valid_corpus[idx:idx + window_size + 1]
        inputs = variable(char_tensor(chunk[:-1], char_to_id).unsqueeze(0),
                          volatile=True)
        targets = variable(char_tensor(chunk[1:], char_to_id).unsqueeze(0))
        losses = []
        for c in range(inputs.size(1)):
            output, hidden = model(inputs[:, c], hidden)
            loss = criterion(output.view(batch_size, -1), targets[:, c])
            losses.append(loss.data[0])
            n_chars += 1
        total_loss += np.sum(losses)
        word_losses = word_loss(chunk, losses)
        total_word_loss += np.sum(word_losses)
        n_words += len(word_losses)
    mean_loss = total_loss / n_chars
    mean_word_perplexity = np.exp(total_word_loss / n_words)
    print('Validation loss: {:.3}, word perplexity: {:.1f}'.format(
        mean_loss, mean_word_perplexity))
    return {
        'valid_loss': mean_loss,
        'valid_word_perplexity': mean_word_perplexity
    }
Exemple #3
0
def train_model(model: CharRNN, criterion, optimizer, inputs: Variable,
                targets: Variable) -> float:
    batch_size = inputs.size(0)
    window_size = inputs.size(1)
    hidden = cuda(model.init_hidden(batch_size))
    model.zero_grad()
    loss = 0
    for c in range(window_size):
        output, hidden = model(inputs[:, c], hidden)
        loss += criterion(output.view(batch_size, -1), targets[:, c])
    loss.backward()
    optimizer.step()
    return loss.data[0] / window_size
Exemple #4
0
    validation_data = CharacterDataset(validation_text,
                                       vocabulary,
                                       batch_size=batch_size,
                                       seq_length=seq_length,
                                       device=device)

    # and make our data loaders
    # batch size is exactly 1 character by default, which is exactly what we need
    train_loader = DataLoader(train_data)
    validation_loader = DataLoader(validation_data)

    # Part 3: modelling
    # we create our model
    model = CharRNN(num_chars).to(device)
    # and the initial hidden state (a tensor of zeros)
    initial_state = model.init_hidden(batch_size, device)

    # we evaluate the capability of our model
    # a character to parameter ratio approaching 1 is optimal
    # too many parameters and the model may overfit
    # too few and the model may underfit
    char_param_ratio = len(text) / count_parameters(model)
    print("Character to model parameter ratio: %f\n" % char_param_ratio)

    # Part 4: training
    train(model,
          initial_state,
          train_loader=train_loader,
          validation_loader=validation_loader,
          epochs=100)