Initialize the hidden state of an LSTM/GRU :param batch_size: The batch_size of the hidden state :return: hidden state of dims (n_layers, batch_size, hidden_dim) ''' # Implement function weight = next(self.parameters()).data # initialize hidden state with zero weights, and move to GPU if available if (train_on_gpu): hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(), weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda()) else: hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(), weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()) return hidden tests.test_rnn(RNN, train_on_gpu) def forward_back_prop(rnn, optimizer, criterion, inp, target, hidden): """ Forward and backward propagation on the neural network :param rnn: The PyTorch Module that holds the neural network :param optimizer: The PyTorch optimizer for the neural network :param criterion: The PyTorch loss function :param inp: A batch of input to the neural network :param target: The target output for the batch of input :return: The loss and the latest hidden state Tensor """ if (train_on_gpu): inp, target = inp.cuda(), target.cuda() # move data to GPU, if available # perform backpropagation and optimization
def test_rnn_structure(self): problem_unittests.test_rnn(RNN=lost_episode.RNN, train_on_gpu=self.train_on_gpu)
def run_tests(): tests.test_rnn(RNN, train_on_gpu) tests.test_forward_back_prop(RNN, forward_back_prop, train_on_gpu)
''' Initialize the hidden state of an LSTM/GRU :param batch_size: The batch_size of the hidden state :return: hidden state of dims (n_layers, batch_size, hidden_dim) ''' # Implement function # initialize hidden state with zero weights, and move to GPU if available # Create two new tensors with sizes n_layers x batch_size x n_hidden, # initialized to zero, for hidden state and cell state of LSTM weight = next(self.parameters()).data if (train_on_gpu): hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda(), weight.new(self.n_layers, batch_size, self.hidden_dim).zero_().cuda()) else: hidden = (weight.new(self.n_layers, batch_size, self.hidden_dim).zero_(), weight.new(self.n_layers, batch_size, self.hidden_dim).zero_()) return hidden from problem_unittests import test_rnn test_rnn(RNN, False)