Beispiel #1
0
iter = 0
header = '  Time Epoch Iteration     Loss   Train/Acc.   Val/Acc.'
print(header)
log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>9.6f}'.split(','))
dev_log_template = ' '.join(
    '{:>6.0f},{:>5.0f},{:>9.0f},{:>9.6f},{:9.6f},{:11.6f}'.split(','))

model.train()
for epoch in range(args.epochs):
    # shuffle the dataset and create batches (truncate the last batch if not of equal size)
    shuffled_indices = np.random.permutation(len(train_set))
    num_batches = len(shuffled_indices) // args.batch_size
    batch_indices = np.split(
        shuffled_indices,
        range(args.batch_size, len(shuffled_indices), args.batch_size))
    model.hidden = model.init_hidden()
    for batch_ix in range(num_batches):
        iter += 1
        batch = train_set[batch_indices[batch_ix]]
        inputs, targets = data.create_tensorized_batch(batch, max_sent_length,
                                                       w2v_map, label_to_ix)
        # print("inputs size: {}".format(inputs.size()))
        # print("targets size: {}".format(targets.size()))

        # clear out gradients and hidden states of the model
        model.zero_grad()
        model.hidden = repackage_hidden(model.hidden)

        # prepare inputs for LSTM model and run forward pass
        scores = model(inputs)
Beispiel #2
0
    print('Character to id:\n', char2id)
    print("Distinct boys' name count:\n", len(boy_names))

    model = BiLSTM(input_size=len(char2id), hidden_size=args.hidden_size, output_size=len(char2id))
    loss_func = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=args.lr)
    batch_size = args.batch_size

    avg_loss = 0
    for epoch in range(args.num_epoch):
        batches = yield_batch(boy_names, batch_size, char2id)
        for i, (input_, target_, seq_lengths) in enumerate(batches):
            loss = 0
            seq_len = len(input_[:, 0])
            batch_size_ = len(input_[0, :])
            hidden = model.init_hidden(batch_size_)
            model.zero_grad()

            for x in range(seq_len):
                output, hidden = model(input_[x, :], hidden)
                loss += loss_func(output, target_[x, :])
            loss.backward()
            optimizer.step()
            avg_loss += (loss.item() / seq_len)
            if i % 100 == 0:
                print('epoch: {}, batch: {}, loss: {}'.format(epoch, i, loss.item()))

    torch.save(model, './name.pt')