def evaluate(self, rollout: StepSequence, hidden_states_name: str = 'hidden_states') -> to.Tensor: """ Re-evaluate the given rollout and return a derivable action tensor. This method makes sure that the gradient is propagated through the hidden state. :param rollout: complete rollout :param hidden_states_name: name of hidden states rollout entry, used for recurrent networks. Change this string for value functions. :return: actions with gradient data """ act_list = [] for ro in rollout.iterate_rollouts(): if hidden_states_name in rollout.data_names: # Get initial hidden state from first step hs = ro[0][hidden_states_name] else: # Let the network pick the default hidden state hs = None # Run each step separately for step in ro: act, hs = self(step.observation, hs) act_list.append(act) return to.stack(act_list)
def evaluate(self, rollout: StepSequence, hidden_states_name: str = 'hidden_states') -> to.Tensor: assert rollout.continuous assert rollout.data_format == 'torch' # The passed sample collection might contain multiple rollouts. # Note: # While we *could* try to convert this to a PackedSequence, allowing us to only call the network once, that # would require a lot of reshaping on the result. So let's not. If performance becomes an issue, revisit here. act_list = [] for ro in rollout.iterate_rollouts(): if hidden_states_name in rollout.data_names: # Get initial hidden state from first step init_hs = self._unpack_hidden(ro[0][hidden_states_name]) else: # Let the network pick the default hidden state init_hs = None # Reshape observations to match torch's rnn sequence protocol obs = ro.get_data_values('observations', True).unsqueeze(1).to(self.device) # Run them through the network output, _ = self.rnn_layers(obs, init_hs) # And through the output layer act = self.output_layer(output.squeeze(1)) if self._output_nonlin is not None: act = self._output_nonlin(act) # Collect the actions act_list.append(act) return to.cat(act_list)
def evaluate(self, rollout: StepSequence, hidden_states_name: str = 'hidden_states') -> to.Tensor: if not rollout.data_format == 'torch': raise pyrado.TypeErr( msg= 'The rollout data passed to evaluate() must be of type torch.Tensor!' ) if not rollout.continuous: raise pyrado.ValueErr( msg= 'The rollout data passed to evaluate() from a continuous rollout!' ) # Set policy, i.e. PyTorch nn.Module, to evaluation mode self.eval() act_list = [] for ro in rollout.iterate_rollouts(): if hidden_states_name in rollout.data_names: # Get initial hidden state from first step hidden = ro[0][hidden_states_name] else: # Let the network pick the default hidden state hidden = None # Run steps consecutively reusing the hidden state for step in ro: act, hidden = self(step.observation, hidden) act_list.append(act) # Set policy, i.e. PyTorch nn.Module, back to training mode self.train() return to.stack(act_list)
def evaluate(self, rollout: StepSequence, hidden_states_name: str = 'hidden_states') -> Tuple[to.Tensor, to.Tensor]: act_list = [] head2_list = [] for ro in rollout.iterate_rollouts(): if hidden_states_name in rollout.data_names: # Get initial hidden state from first step hs = ro[0][hidden_states_name] else: # Let the network pick the default hidden state. hs = None # Run each step separately for step in ro: act, head2, hs = self(step.observation, hs) act_list.append(act) head2_list.append(head2) return to.stack(act_list), to.stack(head2_list)
def evaluate(self, rollout: StepSequence, hidden_states_name: str = 'hidden_states') -> to.Tensor: if not rollout.data_format == 'torch': raise pyrado.TypeErr(msg='The rollout data passed to evaluate() must be of type torch.Tensor!') if not rollout.continuous: raise pyrado.ValueErr(msg='The rollout data passed to evaluate() from a continuous rollout!') # Set policy, i.e. PyTorch nn.Module, to evaluation mode self.eval() # The passed sample collection might contain multiple rollouts. # Note: # While we *could* try to convert this to a PackedSequence, allowing us to only call the network once, that # would require a lot of reshaping on the result. So let's not. If performance becomes an issue, revisit here. act_list = [] for ro in rollout.iterate_rollouts(): if hidden_states_name in rollout.data_names: # Get initial hidden state from first step hidden = self._unpack_hidden(ro[0][hidden_states_name]) else: # Let the network pick the default hidden state hidden = None # Reshape observations to match PyTorch's RNN sequence protocol obs = ro.get_data_values('observations', True).unsqueeze(1).to(self.device) # Pass the input through hidden RNN layers out, _ = self.rnn_layers(obs, hidden) # And through the output layer act = self.output_layer(out.squeeze(1)) if self.output_nonlin is not None: act = self.output_nonlin(act) # Collect the actions act_list.append(act) # Set policy, i.e. PyTorch nn.Module, back to training mode self.train() return to.cat(act_list)
def evaluate(self, rollout: StepSequence, hidden_states_name: str = "hidden_states") -> to.Tensor: if not rollout.data_format == "torch": raise pyrado.TypeErr( msg= "The rollout data passed to evaluate() must be of type torch.Tensor!" ) if not rollout.continuous: raise pyrado.ValueErr( msg= "The rollout data passed to evaluate() from a continuous rollout!" ) # Set policy, i.e. PyTorch nn.Module, to evaluation mode self.eval() # Get a list of all observation sequences and their respective lengths obs_list = [] lengths = [] hidden_list = [] for ro in rollout.iterate_rollouts(): if hidden_states_name in rollout.data_names: # Get initial hidden state from first step, but do not unpack it yet hidden_list.append(ro[0][hidden_states_name]) else: # If no hidden state is given, let it be None hidden_list.append(None) # Reshape observations to match PyTorch's RNN sequence protocol obs = ro.get_data_values("observations", True) obs = obs.to(device=self.device) # Get all observation sequences and their respective lengths obs_list.append(obs) lengths.append(len(obs)) # Pad and then pack observations obs_padded = to.nn.utils.rnn.pad_sequence(obs_list) obs_packed = to.nn.utils.rnn.pack_padded_sequence(obs_padded, lengths=lengths, enforce_sorted=False) # Check whether no hidden state was provided if all(h is None for h in hidden_list): hidden = None else: # Get dimension of hidden state, by using first not None element of hidden state list shape_hidden = next(h for h in hidden_list if h is not None).shape # Exchange all Nones through zero tensors (Pytorch default) hidden_list = [ to.zeros(shape_hidden) if h is None else h for h in hidden_list ] # Get hidden state batch_size = len(hidden_list) hidden = to.stack(hidden_list, dim=0) hidden = self._unpack_hidden(hidden, batch_size=batch_size) # Pass packed observation through RNN, result is also packed out_packed, _ = self.rnn_layers(obs_packed, hidden) # Unpack result out, lens = to.nn.utils.rnn.pad_packed_sequence(out_packed) out = to.cat([out[:l, i] for i, l in enumerate(lens)], dim=0) # Then pass all of it through the output layer act = self.output_layer(out) if self.output_nonlin is not None: act = self.output_nonlin(act) # Set policy, i.e. PyTorch nn.Module, back to training mode self.train() return act