예제 #1
0
def generate(model, vocab, cuda):
    samples = []
    max_len = 50
    inverted_vocab = invert_vocab(vocab)
    device = torch.device("cuda" if cuda else "cpu")
    for s in range(1):
        sample = []
        i = 0
        hidden = model.initHidden(1, device)
        input_tensor = encode_word('<start>', vocab)
        if cuda:
            input_tensor = input_tensor.cuda()
            hidden = (hidden[0].cuda(), hidden[1].cuda())
        outputs, hidden = model(input_tensor, hidden)
        word, input_tensor = random_word(vocab)
        sample.append(word)
        while i < max_len:
            if cuda:
                input_tensor = input_tensor.cuda()
                hidden = (hidden[0].cuda(), hidden[1].cuda())
            outputs, hidden = model(input_tensor, hidden)
            word = decode_word(outputs[0], inverted_vocab)
            sample.append(word)
            input_tensor = encode_word(word, vocab)
            i += 1
        samples.append(sample)
    
    return [' '.join(sample) for sample in samples]
예제 #2
0
def evaluate(model, data, criterion, bsz):
    loss = 0
    hidden = model.initHidden(bsz)
    # Loop over validation data.
    for i in range(0, data.size(0) - 1, bptt):
        seq_len = min(bptt, data.size(0) - 1 - i)
        output, hidden = model(
            Variable(data[i:i + seq_len], requires_grad=False), hidden)
        targets = data[i + 1:i + seq_len + 1].view(-1)
        loss += bptt * criterion(output.view(seq_len * bsz, -1),
                                 Variable(targets, requires_grad=False)).data
        hidden = repackageHidden(hidden)

    return loss[0] / data.size(0)
예제 #3
0
# but don't want to backprop gradients across bptt intervals.
# So we have to rewrap the hidden state in a fresh Variable.
def repackageHidden(h):
    if type(h) == Variable:
        return Variable(h.data)
    else:
        return tuple(repackageHidden(v) for v in h)


# Loop over epochs.
prev_loss = None
for epoch in range(1, args.maxepoch + 1):
    total_loss = 0
    epoch_start_time = time.time()
    # Start with an initial hidden state.
    hidden = model.initHidden(bsz)

    loss = 0
    i = 0
    model.zero_grad()
    total_loss = 0
    start_time = epoch_start_time = time.time()
    ntokens = corpus.dic.ntokens()
    # Loop over the training data.
    for batch, i in enumerate(range(0, train.size(0) - 1, bptt)):
        seq_len = min(bptt, train.size(0) - 1 - i)
        output, hidden = model(
            Variable(train[i:i + seq_len], requires_grad=False), hidden)
        targets = train[i + 1:i + seq_len + 1].view(-1)
        loss = criterion(output.view(-1, ntokens),
                         Variable(targets, requires_grad=False))
예제 #4
0
def train(model, train_loader):

    filename = "rnn_state.pt"
    try:
        state = torch.load(filename)
        model.load_state_dict(state["state_dict"])
        #optimizer.load_state_dict(state["optimizer_dict"])
    except:
        # raise
        print("Could not load model file")
        state = {}
        state["train_loss_history"] = []
        state["test_loss_history"] = []
        state["epoch"] = 0

    criterion = nn.NLLLoss()
    lr = 0.005

    print_every = 5000
    plot_every = 1000
    n_epoch = 50
    train_loss = 0.0
    count = 0
    while state["epoch"] < n_epoch:

        n_batch = len(train_loader)

        model.train()
        for i_batch, batch_data in enumerate(train_loader, 0):
            name_tensor = Variable(batch_data["name_tensor"])
            lang_tensor = Variable(batch_data["lang_tensor"])

            name_tensor = name_tensor.view(name_tensor.size()[1:])
            lang_tensor = lang_tensor.view(1)

            model.zero_grad()
            hidden = model.initHidden()
            n_letters = name_tensor.size()[0]
            for i in range(n_letters):
                output, hidden = model(name_tensor[i], hidden)

            loss = criterion(output, lang_tensor)
            loss.backward()

            train_loss += loss.data[0]

            for p in model.parameters():
                p.data.add_(-lr, p.grad.data)

            if count % plot_every == 0:
                train_loss_avg = train_loss / plot_every
                print("Epoch: %i/%i, Batch: %i/%i, Loss: %f, %s" %
                      (state["epoch"], n_epoch, i_batch, n_batch,
                       train_loss_avg, batch_data["lang"]))
                state["train_loss_history"].append(train_loss_avg)
                train_loss = 0.0
                plt.cla()
                plt.plot(state["train_loss_history"])
                plt.plot(state["test_loss_history"])
                plt.draw()
                plt.pause(0.1)

            count += 1

        print("\nEpoch: %i/%i Saved!" % (state["epoch"], n_epoch))
        state["state_dict"] = model.state_dict()
        # state["optimizer_dict"] = optimizer.state_dict()
        state["epoch"] += 1
        torch.save(state, filename)