Beispiel #1
0
    def experience(self, states, actions, terminal, reward, internals=None):
        """
        Feed experience traces.

        See the [act-experience-update script](https://github.com/tensorforce/tensorforce/blob/master/examples/act_experience_update_interface.py)
        for an example application as part of the act-experience-update interface, which is an
        alternative to the act-observe interaction pattern.

        Args:
            states (dict[array[state]]): Dictionary containing arrays of states
                (<span style="color:#C00000"><b>required</b></span>).
            actions (dict[array[action]]): Dictionary containing arrays of actions
                (<span style="color:#C00000"><b>required</b></span>).
            terminal (array[bool]): Array of terminals
                (<span style="color:#C00000"><b>required</b></span>).
            reward (array[float]): Array of rewards
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[state]): Dictionary containing arrays of internal agent states
                (<span style="color:#C00000"><b>required</b></span> if agent has internal states).
        """
        if not all(len(buffer) == 0 for buffer in self.terminal_buffer):
            raise TensorforceError(
                message="Calling agent.experience is not possible mid-episode."
            )

        # Process states input and infer batching structure
        states, batched, num_instances, is_iter_of_dicts = self._process_states_input(
            states=states, function_name='Agent.experience')

        if is_iter_of_dicts:
            # Input structure iter[dict[input]]

            # Internals
            if internals is None:
                internals = ArrayDict(self.initial_internals())
                internals = internals.fmap(function=(lambda x: np.repeat(
                    np.expand_dims(x, axis=0), repeats=num_instances, axis=0)))
            elif not isinstance(internals, (tuple, list)):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='internals',
                                            dtype=type(internals),
                                            hint='is not tuple/list')
            else:
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=internals[1:])

            # Actions
            if isinstance(actions, np.ndarray):
                actions = ArrayDict(singleton=actions)
            elif not isinstance(actions, (tuple, list)):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='actions',
                                            dtype=type(actions),
                                            hint='is not tuple/list')
            elif not isinstance(actions[0], dict):
                actions = ArrayDict(singleton=np.asarray(actions))
            else:
                actions = [ArrayDict(action) for action in actions]
                actions = actions[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=actions[1:])

        else:
            # Input structure dict[iter[input]]

            # Internals
            if internals is None:
                internals = ArrayDict(self.initial_internals())
                internals = internals.fmap(function=(lambda x: np.tile(
                    np.expand_dims(x, axis=0), reps=(num_instances, ))))
            elif not isinstance(internals, dict):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='internals',
                                            dtype=type(internals),
                                            hint='is not dict')
            else:
                internals = ArrayDict(internals)

            # Actions
            if not isinstance(actions, np.ndarray):
                actions = ArrayDict(singleton=actions)
            elif not isinstance(actions, dict):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='actions',
                                            dtype=type(actions),
                                            hint='is not dict')
            else:
                actions = ArrayDict(actions)

        # Expand inputs if not batched
        if not batched:
            internals = internals.fmap(
                function=(lambda x: np.expand_dims(x, axis=0)))
            actions = actions.fmap(
                function=(lambda x: np.expand_dims(x, axis=0)))
            terminal = np.asarray([terminal])
            reward = np.asarray([reward])
        else:
            terminal = np.asarray(terminal)
            reward = np.asarray(reward)

        # Check number of inputs
        for name, internal in internals.items():
            if internal.shape[0] != num_instances:
                raise TensorforceError.value(
                    name='Agent.experience',
                    argument='len(internals[{}])'.format(name),
                    value=internal.shape[0],
                    hint='!= len(states)')
        for name, action in actions.items():
            if action.shape[0] != num_instances:
                raise TensorforceError.value(
                    name='Agent.experience',
                    argument='len(actions[{}])'.format(name),
                    value=action.shape[0],
                    hint='!= len(states)')
        if terminal.shape[0] != num_instances:
            raise TensorforceError.value(name='Agent.experience',
                                         argument='len(terminal)'.format(name),
                                         value=terminal.shape[0],
                                         hint='!= len(states)')
        if reward.shape[0] != num_instances:
            raise TensorforceError.value(name='Agent.experience',
                                         argument='len(reward)'.format(name),
                                         value=reward.shape[0],
                                         hint='!= len(states)')

        def function(name, spec):
            auxiliary = ArrayDict()
            if self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                if name is None:
                    name = 'action'
                # Mask, either part of states or default all true
                auxiliary['mask'] = states.pop(
                    name + '_mask',
                    np.ones(shape=(num_instances, ) + spec.shape +
                            (spec.num_values, ),
                            dtype=spec.np_type()))
            return auxiliary

        auxiliaries = self.actions_spec.fmap(function=function,
                                             cls=ArrayDict,
                                             with_names=True)
        if self.states_spec.is_singleton() and not states.is_singleton():
            states[None] = states.pop('state')

        # Convert terminal to int if necessary
        if terminal.dtype is util.np_dtype(dtype='bool'):
            zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='int'))
            ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='int'))
            terminal = np.where(terminal, ones, zeros)

        if terminal[-1] == 0:
            raise TensorforceError(
                message="Agent.experience() requires full episodes as input.")

        # Batch experiences split into episodes and at most size buffer_observe
        last = 0
        for index in range(1, len(terminal) + 1):
            if terminal[index - 1] == 0:
                continue

            function = (lambda x: x[last:index])
            states_batch = states.fmap(function=function)
            internals_batch = internals.fmap(function=function)
            auxiliaries_batch = auxiliaries.fmap(function=function)
            actions_batch = actions.fmap(function=function)
            terminal_batch = function(terminal)
            reward_batch = function(reward)
            last = index

            # Inputs to tensors
            states_batch = self.states_spec.to_tensor(
                value=states_batch,
                batched=True,
                name='Agent.experience states')
            internals_batch = self.internals_spec.to_tensor(
                value=internals_batch,
                batched=True,
                recover_empty=True,
                name='Agent.experience internals')
            auxiliaries_batch = self.auxiliaries_spec.to_tensor(
                value=auxiliaries_batch,
                batched=True,
                name='Agent.experience auxiliaries')
            actions_batch = self.actions_spec.to_tensor(
                value=actions_batch,
                batched=True,
                name='Agent.experience actions')
            terminal_batch = self.terminal_spec.to_tensor(
                value=terminal_batch,
                batched=True,
                name='Agent.experience terminal')
            reward_batch = self.reward_spec.to_tensor(
                value=reward_batch,
                batched=True,
                name='Agent.experience reward')

            # Model.experience()
            timesteps, episodes = self.model.experience(
                states=states_batch,
                internals=internals_batch,
                auxiliaries=auxiliaries_batch,
                actions=actions_batch,
                terminal=terminal_batch,
                reward=reward_batch)
            self.timesteps = timesteps.numpy().item()
            self.episodes = episodes.numpy().item()

        if self.model.saver is not None:
            self.model.save()
Beispiel #2
0
    def act(self,
            states,
            internals=None,
            parallel=0,
            independent=False,
            deterministic=True,
            **kwargs):
        # Independent and internals
        is_internals_none = (internals is None)
        if independent:
            if parallel != 0:
                raise TensorforceError.invalid(name='Agent.act',
                                               argument='parallel',
                                               condition='independent is true')
            if is_internals_none and len(self.internals_spec) > 0:
                raise TensorforceError.required(
                    name='Agent.act',
                    argument='internals',
                    condition='independent is true')
        else:
            if not is_internals_none:
                raise TensorforceError.invalid(
                    name='Agent.act',
                    argument='internals',
                    condition='independent is false')

        # Process states input and infer batching structure
        states, batched, num_parallel, is_iter_of_dicts = self._process_states_input(
            states=states, function_name='Agent.act')

        if independent:
            # Independent mode: handle internals argument
            if is_internals_none:
                # Default input internals=None
                pass

            elif is_iter_of_dicts or isinstance(internals, (tuple, list)):
                # Input structure iter[dict[internal]]
                if not isinstance(internals, (tuple, list)):
                    raise TensorforceError.type(name='Agent.act',
                                                argument='internals',
                                                dtype=type(internals),
                                                hint='is not tuple/list')
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=internals[1:])

            else:
                # Input structure dict[iter[internal]]
                if not isinstance(internals, dict):
                    raise TensorforceError.type(name='Agent.act',
                                                argument='internals',
                                                dtype=type(internals),
                                                hint='is not dict')
                internals = ArrayDict(internals)

            if not independent or not is_internals_none:
                # Expand inputs if not batched
                if not batched:
                    internals = internals.fmap(
                        function=(lambda x: np.expand_dims(x, axis=0)))

                # Check number of inputs
                for name, internal in internals.items():
                    if internal.shape[0] != num_parallel:
                        raise TensorforceError.value(
                            name='Agent.act',
                            argument='len(internals[{}])'.format(name),
                            value=internal.shape[0],
                            hint='!= len(states)')

        else:
            # Non-independent mode: handle parallel input
            if batched:
                # Batched input
                parallel = np.asarray(parallel)

            elif parallel == 0:
                # Default input parallel=0
                if batched:
                    assert num_parallel == self.parallel_interactions
                    parallel = np.asarray(list(range(num_parallel)))
                else:
                    parallel = np.asarray([parallel])

            else:
                # Expand input if not batched
                parallel = np.asarray([parallel])

            # Check number of inputs
            if parallel.shape[0] != num_parallel:
                raise TensorforceError.value(name='Agent.act',
                                             argument='len(parallel)',
                                             value=len(parallel),
                                             hint='!= len(states)')

        # If not independent, check whether previous timesteps were completed
        if not independent:
            if not self.timestep_completed[parallel].all():
                raise TensorforceError(
                    message=
                    "Calling agent.act must be preceded by agent.observe for training, or "
                    "agent.act argument 'independent' must be passed as True.")
            self.timestep_completed[parallel] = False

        # Buffer inputs for recording
        if self.recorder is not None and not independent and \
                self.num_episodes >= self.recorder.get('start', 0):
            for n in range(num_parallel):
                for name in self.states_spec:
                    self.buffers['states'][name][parallel[n]].append(
                        states[name][n])

        # fn_act()
        if self._is_agent:
            actions, internals = self.fn_act(
                states=states,
                internals=internals,
                parallel=parallel,
                independent=independent,
                deterministic=deterministic,
                is_internals_none=is_internals_none,
                num_parallel=num_parallel)
        else:
            if batched:
                assert False
            else:
                states = states.fmap(function=(
                    lambda x: x[0].item() if x.shape == (1, ) else x[0]))
                actions = self.fn_act(states.to_kwargs())
                if self.actions_spec.is_singleton():
                    actions = ArrayDict(singleton=np.asarray([actions]))
                else:
                    actions = ArrayDict(actions)
                    actions = actions.fmap(
                        function=(lambda x: np.asarray([x])))

        # Buffer outputs for recording
        if self.recorder is not None and not independent and \
                self.num_episodes >= self.recorder.get('start', 0):
            for n in range(num_parallel):
                for name in self.actions_spec:
                    self.buffers['actions'][name][parallel[n]].append(
                        actions[name][n])

        # Unbatch actions
        if batched:
            # If inputs were batched, turn dict of lists into list of dicts
            function = (lambda x: x.item() if x.shape == () else x)
            # TODO: recursive
            if self.actions_spec.is_singleton():
                actions = actions.singleton()
                if is_iter_of_dicts:
                    actions = [
                        function(actions[n]) for n in range(num_parallel)
                    ]
            else:
                if is_iter_of_dicts:
                    actions = [
                        OrderedDict(((name, function(x[n]))
                                     for name, x in actions.items()))
                        for n in range(num_parallel)
                    ]
                else:
                    actions = OrderedDict(actions.items())

            if independent and not is_internals_none:
                if is_iter_of_dicts:
                    # TODO: recursive
                    internals = [
                        OrderedDict(((name, function(x[n]))
                                     for name, x in internals.items()))
                        for n in range(num_parallel)
                    ]
                else:
                    internals = OrderedDict(internals.items())

        else:
            # If inputs were not batched, unbatch outputs
            function = (lambda x: x.item() if x.shape == (1, ) else x[0])
            if self.actions_spec.is_singleton():
                actions = function(actions.singleton())
            else:
                actions = actions.fmap(function=function, cls=OrderedDict)
            if independent and not is_internals_none:
                internals = internals.fmap(function=function, cls=OrderedDict)

        if independent and not is_internals_none:
            return actions, internals
        else:
            return actions
Beispiel #3
0
    def act(
        self, states, internals=None, parallel=0, independent=False,
        # Deprecated
        deterministic=None, evaluation=None
    ):
        """
        Returns action(s) for the given state(s), needs to be followed by `observe()` unless
        independent mode.

        Args:
            states (dict[state] | iter[dict[state]]): Dictionary containing state(s) to be acted on
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[internal] | iter[dict[internal]]): Dictionary containing current
                internal agent state(s), either given by `initial_internals()` at the beginning of
                an episode or as return value of the preceding `act()` call
                (<span style="color:#C00000"><b>required</b></span> if independent mode and agent
                has internal states).
            parallel (int | iter[int]): Parallel execution index
                (<span style="color:#00C000"><b>default</b></span>: 0).
            independent (bool): Whether act is not part of the main agent-environment interaction,
                and this call is thus not followed by observe
                (<span style="color:#00C000"><b>default</b></span>: false).

        Returns:
            dict[action] | iter[dict[action]], dict[internal] | iter[dict[internal]] if `internals`
            argument given: Dictionary containing action(s), dictionary containing next internal
            agent state(s) if independent mode.
        """
        if deterministic is not None:
            raise TensorforceError.deprecated(
                name='Agent.act', argument='deterministic', replacement='independent'
            )
        if evaluation is not None:
            raise TensorforceError.deprecated(
                name='Agent.act', argument='evaluation', replacement='independent'
            )

        # Independent and internals
        if independent:
            if parallel != 0:
                raise TensorforceError.invalid(
                    name='Agent.act', argument='parallel', condition='independent is true'
                )
            is_internals_none = (internals is None)
            if is_internals_none and len(self.internals_spec) > 0:
                raise TensorforceError.required(
                    name='Agent.act', argument='internals', condition='independent is true'
                )
        else:
            if internals is not None:
                raise TensorforceError.invalid(
                    name='Agent.act', argument='internals', condition='independent is false'
                )

        # Process states input and infer batching structure
        states, batched, num_parallel, is_iter_of_dicts, input_type = self._process_states_input(
            states=states, function_name='Agent.act'
        )

        if independent:
            # Independent mode: handle internals argument

            if is_internals_none:
                # Default input internals=None
                pass

            elif is_iter_of_dicts:
                # Input structure iter[dict[internal]]
                if not isinstance(internals, (tuple, list)):
                    raise TensorforceError.type(
                        name='Agent.act', argument='internals', dtype=type(internals),
                        hint='is not tuple/list'
                    )
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)), zip_values=internals[1:]
                )

            else:
                # Input structure dict[iter[internal]]
                if not isinstance(internals, dict):
                    raise TensorforceError.type(
                        name='Agent.act', argument='internals', dtype=type(internals),
                        hint='is not dict'
                    )
                internals = ArrayDict(internals)

            if not independent or not is_internals_none:
                # Expand inputs if not batched
                if not batched:
                    internals = internals.fmap(function=(lambda x: np.expand_dims(x, axis=0)))

                # Check number of inputs
                for name, internal in internals.items():
                    if internal.shape[0] != num_parallel:
                        raise TensorforceError.value(
                            name='Agent.act', argument='len(internals[{}])'.format(name),
                            value=internal.shape[0], hint='!= len(states)'
                        )

        else:
            # Non-independent mode: handle parallel input

            if parallel == 0:
                # Default input parallel=0
                if batched:
                    assert num_parallel == self.parallel_interactions
                    parallel = np.asarray(list(range(num_parallel)))
                else:
                    parallel = np.asarray([parallel])

            elif batched:
                # Batched input
                parallel = np.asarray(parallel)

            else:
                # Expand input if not batched
                parallel = np.asarray([parallel])

            # Check number of inputs
            if parallel.shape[0] != num_parallel:
                raise TensorforceError.value(
                    name='Agent.act', argument='len(parallel)', value=len(parallel),
                    hint='!= len(states)'
                )

        def function(name, spec):
            auxiliary = ArrayDict()
            if self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                # Mask, either part of states or default all true
                auxiliary['mask'] = states.pop(name + '_mask', np.ones(
                    shape=(num_parallel,) + spec.shape + (spec.num_values,), dtype=spec.np_type()
                ))
            return auxiliary

        auxiliaries = self.actions_spec.fmap(function=function, cls=ArrayDict, with_names=True)

        # If not independent, check whether previous timesteps were completed
        if not independent:
            if not self.timestep_completed[parallel].all():
                raise TensorforceError(
                    message="Calling agent.act must be preceded by agent.observe."
                )
            self.timestep_completed[parallel] = False

        # Buffer inputs for recording
        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            for n in range(num_parallel):
                for name in self.states_spec:
                    self.buffers['states'][name][parallel[n]].append(states[name][n])
                for name in self.auxiliaries_spec:
                    self.buffers['auxiliaries'][name][parallel[n]].append(auxiliaries[name][n])

        # Inputs to tensors
        states = self.states_spec.to_tensor(value=states, batched=True)
        if independent and not is_internals_none:
            internals = self.internals_spec.to_tensor(value=internals, batched=True)
        auxiliaries = self.auxiliaries_spec.to_tensor(value=auxiliaries, batched=True)
        parallel_tensor = self.parallel_spec.to_tensor(value=parallel, batched=True)

        # Model.act()
        if not independent:
            actions, timesteps = self.model.act(
                states=states, auxiliaries=auxiliaries, parallel=parallel_tensor
            )
            self.timesteps = timesteps.numpy().item()

        elif len(self.internals_spec) > 0:
            if len(self.auxiliaries_spec) > 0:
                actions_internals = self.model.independent_act(
                    states=states, internals=internals, auxiliaries=auxiliaries
                )
            else:
                assert len(auxiliaries) == 0
                actions_internals = self.model.independent_act(states=states, internals=internals)
            actions_internals = TensorDict(actions_internals)
            actions = actions_internals['actions']
            internals = actions_internals['internals']

        else:
            if len(self.auxiliaries_spec) > 0:
                actions = self.model.independent_act(states=states, auxiliaries=auxiliaries)
            else:
                assert len(auxiliaries) == 0
                actions = self.model.independent_act(states=states)
            actions = TensorDict(actions)

        # Outputs from tensors
        # print(actions)
        actions = self.actions_spec.from_tensor(tensor=actions, batched=True)

        # Buffer outputs for recording
        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            for n in range(num_parallel):
                for name in self.actions_spec:
                    self.buffers['actions'][name][parallel[n]].append(actions[name][n])

        # Unbatch actions
        if batched:
            # If inputs were batched, turn list of dicts into dict of lists
            function = (lambda x: x.item() if x.shape == () else x)
            if self.single_action:
                actions = input_type(function(actions['action'][n]) for n in range(num_parallel))
            else:
                # TODO: recursive
                actions = input_type(
                    OrderedDict(((name, function(x[n])) for name, x in actions.items()))
                    for n in range(num_parallel)
                )

            if independent and not is_internals_none and is_iter_of_dicts:
                # TODO: recursive
                internals = input_type(
                    OrderedDict(((name, function(x[n])) for name, x in internals.items()))
                    for n in range(num_parallel)
                )

        else:
            # If inputs were not batched, unbatch outputs
            function = (lambda x: x.item() if x.shape == (1,) else x[0])
            if self.single_action:
                actions = function(actions['action'])
            else:
                actions = actions.fmap(function=function, cls=OrderedDict)
            if independent and not is_internals_none:
                internals = internals.fmap(function=function, cls=OrderedDict)

        if self.model.saver is not None:
            self.model.save()

        if independent and not is_internals_none:
            return actions, internals
        else:
            return actions