Beispiel #1
0
    def evaluate(self,
                 rollout: StepSequence,
                 hidden_states_name: str = 'hidden_states') -> to.Tensor:
        """
        Re-evaluate the given rollout and return a derivable action tensor.
        This method makes sure that the gradient is propagated through the hidden state.

        :param rollout: complete rollout
        :param hidden_states_name: name of hidden states rollout entry, used for recurrent networks.
                                   Change this string for value functions.
        :return: actions with gradient data
        """
        act_list = []
        for ro in rollout.iterate_rollouts():
            if hidden_states_name in rollout.data_names:
                # Get initial hidden state from first step
                hs = ro[0][hidden_states_name]
            else:
                # Let the network pick the default hidden state
                hs = None

            # Run each step separately
            for step in ro:
                act, hs = self(step.observation, hs)
                act_list.append(act)

        return to.stack(act_list)
Beispiel #2
0
    def evaluate(self,
                 rollout: StepSequence,
                 hidden_states_name: str = 'hidden_states') -> to.Tensor:
        assert rollout.continuous
        assert rollout.data_format == 'torch'

        # The passed sample collection might contain multiple rollouts.
        # Note:
        # While we *could* try to convert this to a PackedSequence, allowing us to only call the network once, that
        # would require a lot of reshaping on the result. So let's not. If performance becomes an issue, revisit here.
        act_list = []
        for ro in rollout.iterate_rollouts():
            if hidden_states_name in rollout.data_names:
                # Get initial hidden state from first step
                init_hs = self._unpack_hidden(ro[0][hidden_states_name])
            else:
                # Let the network pick the default hidden state
                init_hs = None

            # Reshape observations to match torch's rnn sequence protocol
            obs = ro.get_data_values('observations',
                                     True).unsqueeze(1).to(self.device)

            # Run them through the network
            output, _ = self.rnn_layers(obs, init_hs)

            # And through the output layer
            act = self.output_layer(output.squeeze(1))
            if self._output_nonlin is not None:
                act = self._output_nonlin(act)

            # Collect the actions
            act_list.append(act)

        return to.cat(act_list)
    def evaluate(self,
                 rollout: StepSequence,
                 hidden_states_name: str = 'hidden_states') -> to.Tensor:
        if not rollout.data_format == 'torch':
            raise pyrado.TypeErr(
                msg=
                'The rollout data passed to evaluate() must be of type torch.Tensor!'
            )
        if not rollout.continuous:
            raise pyrado.ValueErr(
                msg=
                'The rollout data passed to evaluate() from a continuous rollout!'
            )

        # Set policy, i.e. PyTorch nn.Module, to evaluation mode
        self.eval()

        act_list = []
        for ro in rollout.iterate_rollouts():
            if hidden_states_name in rollout.data_names:
                # Get initial hidden state from first step
                hidden = ro[0][hidden_states_name]
            else:
                # Let the network pick the default hidden state
                hidden = None

            # Run steps consecutively reusing the hidden state
            for step in ro:
                act, hidden = self(step.observation, hidden)
                act_list.append(act)

        # Set policy, i.e. PyTorch nn.Module, back to training mode
        self.train()

        return to.stack(act_list)
Beispiel #4
0
    def evaluate(self, rollout: StepSequence, hidden_states_name: str = 'hidden_states') -> Tuple[to.Tensor, to.Tensor]:
        act_list = []
        head2_list = []
        for ro in rollout.iterate_rollouts():
            if hidden_states_name in rollout.data_names:
                # Get initial hidden state from first step
                hs = ro[0][hidden_states_name]
            else:
                # Let the network pick the default hidden state.
                hs = None

            # Run each step separately
            for step in ro:
                act, head2, hs = self(step.observation, hs)
                act_list.append(act)
                head2_list.append(head2)

        return to.stack(act_list), to.stack(head2_list)
Beispiel #5
0
    def evaluate(self, rollout: StepSequence, hidden_states_name: str = 'hidden_states') -> to.Tensor:
        if not rollout.data_format == 'torch':
            raise pyrado.TypeErr(msg='The rollout data passed to evaluate() must be of type torch.Tensor!')
        if not rollout.continuous:
            raise pyrado.ValueErr(msg='The rollout data passed to evaluate() from a continuous rollout!')

        # Set policy, i.e. PyTorch nn.Module, to evaluation mode
        self.eval()

        # The passed sample collection might contain multiple rollouts.
        # Note:
        # While we *could* try to convert this to a PackedSequence, allowing us to only call the network once, that
        # would require a lot of reshaping on the result. So let's not. If performance becomes an issue, revisit here.
        act_list = []
        for ro in rollout.iterate_rollouts():
            if hidden_states_name in rollout.data_names:
                # Get initial hidden state from first step
                hidden = self._unpack_hidden(ro[0][hidden_states_name])
            else:
                # Let the network pick the default hidden state
                hidden = None

            # Reshape observations to match PyTorch's RNN sequence protocol
            obs = ro.get_data_values('observations', True).unsqueeze(1).to(self.device)

            # Pass the input through hidden RNN layers
            out, _ = self.rnn_layers(obs, hidden)

            # And through the output layer
            act = self.output_layer(out.squeeze(1))
            if self.output_nonlin is not None:
                act = self.output_nonlin(act)

            # Collect the actions
            act_list.append(act)

        # Set policy, i.e. PyTorch nn.Module, back to training mode
        self.train()

        return to.cat(act_list)
Beispiel #6
0
    def evaluate(self,
                 rollout: StepSequence,
                 hidden_states_name: str = "hidden_states") -> to.Tensor:
        if not rollout.data_format == "torch":
            raise pyrado.TypeErr(
                msg=
                "The rollout data passed to evaluate() must be of type torch.Tensor!"
            )
        if not rollout.continuous:
            raise pyrado.ValueErr(
                msg=
                "The rollout data passed to evaluate() from a continuous rollout!"
            )

        # Set policy, i.e. PyTorch nn.Module, to evaluation mode
        self.eval()

        # Get a list of all observation sequences and their respective lengths
        obs_list = []
        lengths = []
        hidden_list = []

        for ro in rollout.iterate_rollouts():
            if hidden_states_name in rollout.data_names:
                # Get initial hidden state from first step, but do not unpack it yet
                hidden_list.append(ro[0][hidden_states_name])
            else:
                # If no hidden state is given, let it be None
                hidden_list.append(None)

            # Reshape observations to match PyTorch's RNN sequence protocol
            obs = ro.get_data_values("observations", True)
            obs = obs.to(device=self.device)

            # Get all observation sequences and their respective lengths
            obs_list.append(obs)
            lengths.append(len(obs))

        # Pad and then pack observations
        obs_padded = to.nn.utils.rnn.pad_sequence(obs_list)
        obs_packed = to.nn.utils.rnn.pack_padded_sequence(obs_padded,
                                                          lengths=lengths,
                                                          enforce_sorted=False)

        # Check whether no hidden state was provided
        if all(h is None for h in hidden_list):
            hidden = None
        else:
            # Get dimension of hidden state, by using first not None element of hidden state list
            shape_hidden = next(h for h in hidden_list if h is not None).shape

            # Exchange all Nones through zero tensors (Pytorch default)
            hidden_list = [
                to.zeros(shape_hidden) if h is None else h for h in hidden_list
            ]

            # Get hidden state
            batch_size = len(hidden_list)
            hidden = to.stack(hidden_list, dim=0)
            hidden = self._unpack_hidden(hidden, batch_size=batch_size)

        # Pass packed observation through RNN, result is also packed
        out_packed, _ = self.rnn_layers(obs_packed, hidden)

        # Unpack result
        out, lens = to.nn.utils.rnn.pad_packed_sequence(out_packed)
        out = to.cat([out[:l, i] for i, l in enumerate(lens)], dim=0)

        # Then pass all of it through the output layer
        act = self.output_layer(out)
        if self.output_nonlin is not None:
            act = self.output_nonlin(act)

        # Set policy, i.e. PyTorch nn.Module, back to training mode
        self.train()

        return act