iter = 0 header = ' Time Epoch Iteration Loss Train/Acc. Val/Acc.' print(header) log_template = ' '.join('{:>6.0f},{:>5.0f},{:>9.0f},{:>9.6f}'.split(',')) dev_log_template = ' '.join( '{:>6.0f},{:>5.0f},{:>9.0f},{:>9.6f},{:9.6f},{:11.6f}'.split(',')) model.train() for epoch in range(args.epochs): # shuffle the dataset and create batches (truncate the last batch if not of equal size) shuffled_indices = np.random.permutation(len(train_set)) num_batches = len(shuffled_indices) // args.batch_size batch_indices = np.split( shuffled_indices, range(args.batch_size, len(shuffled_indices), args.batch_size)) model.hidden = model.init_hidden() for batch_ix in range(num_batches): iter += 1 batch = train_set[batch_indices[batch_ix]] inputs, targets = data.create_tensorized_batch(batch, max_sent_length, w2v_map, label_to_ix) # print("inputs size: {}".format(inputs.size())) # print("targets size: {}".format(targets.size())) # clear out gradients and hidden states of the model model.zero_grad() model.hidden = repackage_hidden(model.hidden) # prepare inputs for LSTM model and run forward pass scores = model(inputs)
print('Character to id:\n', char2id) print("Distinct boys' name count:\n", len(boy_names)) model = BiLSTM(input_size=len(char2id), hidden_size=args.hidden_size, output_size=len(char2id)) loss_func = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=args.lr) batch_size = args.batch_size avg_loss = 0 for epoch in range(args.num_epoch): batches = yield_batch(boy_names, batch_size, char2id) for i, (input_, target_, seq_lengths) in enumerate(batches): loss = 0 seq_len = len(input_[:, 0]) batch_size_ = len(input_[0, :]) hidden = model.init_hidden(batch_size_) model.zero_grad() for x in range(seq_len): output, hidden = model(input_[x, :], hidden) loss += loss_func(output, target_[x, :]) loss.backward() optimizer.step() avg_loss += (loss.item() / seq_len) if i % 100 == 0: print('epoch: {}, batch: {}, loss: {}'.format(epoch, i, loss.item())) torch.save(model, './name.pt')