def generate(model, vocab, cuda): samples = [] max_len = 50 inverted_vocab = invert_vocab(vocab) device = torch.device("cuda" if cuda else "cpu") for s in range(1): sample = [] i = 0 hidden = model.initHidden(1, device) input_tensor = encode_word('<start>', vocab) if cuda: input_tensor = input_tensor.cuda() hidden = (hidden[0].cuda(), hidden[1].cuda()) outputs, hidden = model(input_tensor, hidden) word, input_tensor = random_word(vocab) sample.append(word) while i < max_len: if cuda: input_tensor = input_tensor.cuda() hidden = (hidden[0].cuda(), hidden[1].cuda()) outputs, hidden = model(input_tensor, hidden) word = decode_word(outputs[0], inverted_vocab) sample.append(word) input_tensor = encode_word(word, vocab) i += 1 samples.append(sample) return [' '.join(sample) for sample in samples]
def evaluate(model, data, criterion, bsz): loss = 0 hidden = model.initHidden(bsz) # Loop over validation data. for i in range(0, data.size(0) - 1, bptt): seq_len = min(bptt, data.size(0) - 1 - i) output, hidden = model( Variable(data[i:i + seq_len], requires_grad=False), hidden) targets = data[i + 1:i + seq_len + 1].view(-1) loss += bptt * criterion(output.view(seq_len * bsz, -1), Variable(targets, requires_grad=False)).data hidden = repackageHidden(hidden) return loss[0] / data.size(0)
# but don't want to backprop gradients across bptt intervals. # So we have to rewrap the hidden state in a fresh Variable. def repackageHidden(h): if type(h) == Variable: return Variable(h.data) else: return tuple(repackageHidden(v) for v in h) # Loop over epochs. prev_loss = None for epoch in range(1, args.maxepoch + 1): total_loss = 0 epoch_start_time = time.time() # Start with an initial hidden state. hidden = model.initHidden(bsz) loss = 0 i = 0 model.zero_grad() total_loss = 0 start_time = epoch_start_time = time.time() ntokens = corpus.dic.ntokens() # Loop over the training data. for batch, i in enumerate(range(0, train.size(0) - 1, bptt)): seq_len = min(bptt, train.size(0) - 1 - i) output, hidden = model( Variable(train[i:i + seq_len], requires_grad=False), hidden) targets = train[i + 1:i + seq_len + 1].view(-1) loss = criterion(output.view(-1, ntokens), Variable(targets, requires_grad=False))
def train(model, train_loader): filename = "rnn_state.pt" try: state = torch.load(filename) model.load_state_dict(state["state_dict"]) #optimizer.load_state_dict(state["optimizer_dict"]) except: # raise print("Could not load model file") state = {} state["train_loss_history"] = [] state["test_loss_history"] = [] state["epoch"] = 0 criterion = nn.NLLLoss() lr = 0.005 print_every = 5000 plot_every = 1000 n_epoch = 50 train_loss = 0.0 count = 0 while state["epoch"] < n_epoch: n_batch = len(train_loader) model.train() for i_batch, batch_data in enumerate(train_loader, 0): name_tensor = Variable(batch_data["name_tensor"]) lang_tensor = Variable(batch_data["lang_tensor"]) name_tensor = name_tensor.view(name_tensor.size()[1:]) lang_tensor = lang_tensor.view(1) model.zero_grad() hidden = model.initHidden() n_letters = name_tensor.size()[0] for i in range(n_letters): output, hidden = model(name_tensor[i], hidden) loss = criterion(output, lang_tensor) loss.backward() train_loss += loss.data[0] for p in model.parameters(): p.data.add_(-lr, p.grad.data) if count % plot_every == 0: train_loss_avg = train_loss / plot_every print("Epoch: %i/%i, Batch: %i/%i, Loss: %f, %s" % (state["epoch"], n_epoch, i_batch, n_batch, train_loss_avg, batch_data["lang"])) state["train_loss_history"].append(train_loss_avg) train_loss = 0.0 plt.cla() plt.plot(state["train_loss_history"]) plt.plot(state["test_loss_history"]) plt.draw() plt.pause(0.1) count += 1 print("\nEpoch: %i/%i Saved!" % (state["epoch"], n_epoch)) state["state_dict"] = model.state_dict() # state["optimizer_dict"] = optimizer.state_dict() state["epoch"] += 1 torch.save(state, filename)