def predict(self, char, h=None, cuda=False, top_k=None): ''' Given a character, predict the next character. Returns the predicted character and the hidden state. ''' if cuda: self.cuda() else: self.cpu() if h is None: h = self.init_hidden(1) x = np.array([[self.char2int[char]]]) x = one_hot_encode(x, len(self.chars)) inputs = Variable(torch.from_numpy(x), volatile=True) if cuda: inputs = inputs.cuda() h = tuple([Variable(each.data, volatile=True) for each in h]) out, h = self.forward(inputs, h) p = F.softmax(out).data if cuda: p = p.cpu() if top_k is None: top_ch = np.arange(len(self.chars)) else: p, top_ch = p.topk(top_k) top_ch = top_ch.numpy().squeeze() p = p.numpy().squeeze() char = np.random.choice(top_ch, p=p / p.sum()) return self.int2char[char], h
def train(net, data, epochs=10, n_seqs=10, n_steps=50, lr=0.001, clip=5, val_frac=0.1, cuda=False, print_every=10): ''' Train a network Arguments --------- net: CharRNN network data: text data to train the network epochs: Number of epochs to train n_seqs: Number of mini-sequences per mini-batch, aka batch size n_steps: Number of character steps per mini-batch lr: learning rate clip: gradient clipping val_frac: Fraction of data to hold out for validation cuda: Train with CUDA on a GPU print_every: Number of steps for printing training and validation loss ''' net.train() opt = torch.optim.Adam(net.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() # create training and validation data val_idx = int(len(data) * (1 - val_frac)) data, val_data = data[:val_idx], data[val_idx:] if cuda: net.cuda() counter = 0 n_chars = len(net.chars) for e in range(epochs): h = net.init_hidden(n_seqs) for x, y in get_batches(data, n_seqs, n_steps): counter += 1 # One-hot encode our data and make them Torch tensors x = one_hot_encode(x, n_chars) x, y = torch.from_numpy(x), torch.from_numpy(y) inputs, targets = Variable(x), Variable(y) if cuda: inputs, targets = inputs.cuda(), targets.cuda() # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history h = tuple([Variable(each.data) for each in h]) net.zero_grad() output, h = net.forward(inputs, h) loss = criterion(output, targets.view(n_seqs * n_steps)) loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. nn.utils.clip_grad_norm(net.parameters(), clip) opt.step() if counter % print_every == 0: # Get validation loss val_h = net.init_hidden(n_seqs) val_losses = [] for x, y in get_batches(val_data, n_seqs, n_steps): # One-hot encode our data and make them Torch tensors x = one_hot_encode(x, n_chars) x, y = torch.from_numpy(x), torch.from_numpy(y) # Creating new variables for the hidden state, otherwise # we'd backprop through the entire training history val_h = tuple( [Variable(each.data, volatile=True) for each in val_h]) inputs, targets = Variable(x, volatile=True), Variable( y, volatile=True) if cuda: inputs, targets = inputs.cuda(), targets.cuda() output, val_h = net.forward(inputs, val_h) val_loss = criterion(output, targets.view(n_seqs * n_steps)) val_losses.append(val_loss.data[0]) print("Epoch: {}/{}...".format(e + 1, epochs), "Step: {}...".format(counter), "Loss: {:.4f}...".format(loss.data[0]), "Val Loss: {:.4f}".format(np.mean(val_losses))) return np.mean(val_losses)