optimizer = torch.optim.Adam(model.parameters(), lr=flags.learning_rate) epoch = 1 iterator = 0 for i in range(flags.epochs * flags.max_batch): if iterator >= epoch * flags.max_batch: epoch += 1 state_h, state_c = model.zero_state(flags.batch_size) state_h = state_h.to(device) state_c = state_c.to(device) iterator += 1 inputs, targets = dataset() model.train() optimizer.zero_grad() x = torch.tensor(inputs).to(device) y = torch.tensor(targets).to(device) logits, (state_h, state_c) = model(x, (state_h, state_c)) loss = criterion(logits.transpose(1, 2), y) loss_value = loss.item() loss.backward() state_h = state_h.detach() state_c = state_c.detach()
'state_dict': deepsets.state_dict(), 'optimizer': optimizer.state_dict(), }, 'model_deepsets.pth.tar') print("Finished training for DeepSets model") print() # Initializes LSTM model and optimizer lstm = LSTM(n_digits, embedding_dim, hidden_dim).to(device) optimizer = optim.Adam(lstm.parameters(), lr=learning_rate) loss_function = nn.L1Loss() # Trains the LSTM model for epoch in range(epochs): t = time.time() lstm.train() train_loss = 0 count = 0 idx = np.random.permutation(n_train) for i in range(0, n_train, batch_size): ############## Task 5 ################## # your code here # ################## x_batch = X_train[idx[i:min(n_train, i + batch_size)], :] y_batch = y_train[idx[i:min(n_train, i + batch_size)]] x_batch = torch.LongTensor(x_batch).to(device)