Exemple #1
0
 def function(name, spec):
     auxiliary = ArrayDict()
     if self.config.enable_int_action_masking and spec.type == 'int' and \
             spec.num_values is not None:
         if name is None:
             name = 'action'
         # Mask, either part of states or default all true
         auxiliary['mask'] = states.pop(
             name + '_mask',
             np.ones(shape=(num_instances, ) + spec.shape +
                     (spec.num_values, ),
                     dtype=spec.np_type()))
     return auxiliary
Exemple #2
0
    def __init__(self, *, states, actions, l2_regularization,
                 parallel_interactions, config, saver, summarizer, tracking):
        # Initialize global registries
        setattr(Module, '_MODULE_STACK', list())
        setattr(Layer, '_REGISTERED_LAYERS', OrderedDict())

        # Tensorforce config
        self._config = config

        Module._MODULE_STACK.clear()
        Module._MODULE_STACK.append(self.__class__)

        super().__init__(device=self._config.device,
                         l2_regularization=l2_regularization,
                         name=self._config.name)

        assert self.l2_regularization is not None
        self.is_trainable = True
        self.is_saved = True

        # Keep track of tensor names to check for collisions
        self.value_names = set()

        # Terminal specification
        self.terminal_spec = TensorSpec(type='int', shape=(), num_values=3)
        self.value_names.add('terminal')

        # Reward specification
        self.reward_spec = TensorSpec(type='float', shape=())
        self.value_names.add('reward')

        # Parallel specification
        self.parallel_spec = TensorSpec(type='int',
                                        shape=(),
                                        num_values=parallel_interactions)
        self.value_names.add('parallel')

        # Deterministic specification
        self.deterministic_spec = TensorSpec(type='bool', shape=())
        self.value_names.add('deterministic')

        # State space specification
        self.states_spec = states
        for name, spec in self.states_spec.items():
            name = ('' if name is None else ' ' + name)
            if spec.type != 'float':
                continue
            elif spec.min_value is None:
                logging.warning(
                    "No min_value bound specified for state{}.".format(name))
            elif np.isinf(spec.min_value).any():
                logging.warning(
                    "Infinite min_value bound for state{}.".format(name))
            elif spec.max_value is None:
                logging.warning(
                    "No max_value bound specified for state{}.".format(name))
            elif np.isinf(spec.max_value).any():
                logging.warning(
                    "Infinite max_value bound for state{}.".format(name))

        # Check for name collisions
        if self.states_spec.is_singleton():
            if 'state' in self.value_names:
                raise TensorforceError.exists(name='value name', value=name)
            self.value_names.add('state')
        else:
            for name in self.states_spec:
                if name in self.value_names:
                    raise TensorforceError.exists(name='value name',
                                                  value=name)
                self.value_names.add(name)

        # Action space specification
        self.actions_spec = actions
        for name, spec in self.actions_spec.items():
            name = ('' if name is None else ' ' + name)
            if spec.type != 'float':
                continue
            elif spec.min_value is None:
                logging.warning(
                    "No min_value specified for action{}.".format(name))
            elif np.isinf(spec.min_value).any():
                raise TensorforceError(
                    "Infinite min_value bound for action{}.".format(name))
            elif spec.max_value is None:
                logging.warning(
                    "No max_value specified for action{}.".format(name))
            elif np.isinf(spec.max_value).any():
                raise TensorforceError(
                    "Infinite max_value bound for action{}.".format(name))

        # Check for name collisions
        if self.actions_spec.is_singleton():
            if 'action' in self.value_names:
                raise TensorforceError.exists(name='value name', value=name)
            self.value_names.add('action')
        else:
            for name in self.actions_spec:
                if name in self.value_names:
                    raise TensorforceError.exists(name='value name',
                                                  value=name)
                self.value_names.add(name)

        # Internal state space specification
        self.internals_spec = TensorsSpec()
        self.initial_internals = ArrayDict()

        # Auxiliary value space specification
        self.auxiliaries_spec = TensorsSpec()
        for name, spec in self.actions_spec.items():
            if self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                self.auxiliaries_spec[name] = TensorsSpec(mask=TensorSpec(
                    type='bool', shape=(spec.shape + (spec.num_values, ))))

        # Parallel interactions
        assert isinstance(parallel_interactions,
                          int) and parallel_interactions >= 1
        self.parallel_interactions = parallel_interactions

        # Saver
        if isinstance(saver, str):
            saver = dict(directory=saver)
        if saver is None:
            self.saver = None
        elif not all(key in ('directory', 'filename', 'frequency', 'load',
                             'max_checkpoints', 'max_hour_frequency', 'unit')
                     for key in saver):
            raise TensorforceError.value(
                name='agent',
                argument='saver',
                value=list(saver),
                hint=
                'not from {directory,filename,frequency,load,max_checkpoints,'
                'max_hour_frequency,unit}')
        elif 'directory' not in saver:
            raise TensorforceError.required(name='agent',
                                            argument='saver[directory]')
        else:
            self.saver = dict(saver)

        # Summarizer
        if isinstance(summarizer, str):
            summarizer = dict(directory=summarizer)
        if summarizer is None:
            self.summarizer = None
            self.summaries = frozenset()
        elif not all(key in ('directory', 'filename', 'flush', 'max_summaries',
                             'summaries') for key in summarizer):
            raise TensorforceError.value(
                name='agent',
                argument='summarizer',
                value=list(summarizer),
                hint=
                'not from {directory,filename,flush,max_summaries,summaries}')
        elif 'directory' not in summarizer:
            raise TensorforceError.required(name='agent',
                                            argument='summarizer[directory]')
        else:
            self.summarizer = dict(summarizer)

            # Summary labels
            summaries = summarizer.get('summaries')
            if summaries is None or summaries == 'all':
                self.summaries = 'all'
            elif not all(isinstance(label, str) for label in summaries):
                raise TensorforceError.value(name='agent',
                                             argument='summarizer[summaries]',
                                             value=summaries)
            else:
                self.summaries = frozenset(summaries)

        # Tracking
        if tracking is None:
            self.tracking = frozenset()
        elif tracking == 'all':
            self.tracking = 'all'
        else:
            self.tracking = frozenset(tracking)
Exemple #3
0
    def _process_states_input(self, states, function_name):
        if self.states_spec.is_singleton() and not isinstance(
                states, dict) and not (util.is_iterable(x=states)
                                       and isinstance(states[0], dict)):
            # Single state
            states = np.asarray(states)
            if states.shape == self.states_spec.value().shape:
                # Single state is not batched
                states = ArrayDict(singleton=np.expand_dims(states, axis=0))
                batched = False
                num_instances = 1
                is_iter_of_dicts = None

            else:
                # Single state is batched, iter[state]
                assert states.shape[1:] == self.states_spec.value().shape
                assert type(states) in (tuple, list, np.ndarray)
                num_instances = states.shape[0]
                states = ArrayDict(singleton=states)
                batched = True
                is_iter_of_dicts = True  # Default

        elif util.is_iterable(x=states):
            # States is batched, iter[dict[state]]
            batched = True
            num_instances = len(states)
            is_iter_of_dicts = True
            assert type(states) in (tuple, list)
            if num_instances == 0:
                raise TensorforceError.value(name=function_name,
                                             argument='len(states)',
                                             value=num_instances,
                                             hint='= 0')
            for n, state in enumerate(states):
                if not isinstance(state, dict):
                    raise TensorforceError.type(
                        name=function_name,
                        argument='states[{}]'.format(n),
                        dtype=type(state),
                        hint='is not dict')
            # Turn iter of dicts into dict of arrays
            # (Doesn't use self.states_spec since states also contains auxiliaries)
            states = [ArrayDict(state) for state in states]
            states = states[0].fmap(
                function=(lambda *xs: np.stack(xs, axis=0)),
                zip_values=states[1:])

        elif isinstance(states, dict):
            # States is dict, turn into arrays
            states = ArrayDict(states)
            name, spec = self.states_spec.item()
            if name is None:
                name = 'state'

            if states[name].shape == spec.shape:
                # States is not batched, dict[state]
                states = states.fmap(
                    function=(lambda state: np.expand_dims(state, axis=0)))
                batched = False
                num_instances = 1
                is_iter_of_dicts = None

            else:
                # States is batched, dict[iter[state]]
                assert states[name].shape[1:] == spec.shape
                assert type(states[name]) in (tuple, list, np.ndarray)
                batched = True
                num_instances = states[name].shape[0]
                is_iter_of_dicts = False
                if num_instances == 0:
                    raise TensorforceError.value(name=function_name,
                                                 argument='len(states)',
                                                 value=num_instances,
                                                 hint='= 0')

        else:
            raise TensorforceError.type(name=function_name,
                                        argument='states',
                                        dtype=type(states),
                                        hint='is not array/tuple/list/dict')

        # Check number of inputs
        if any(state.shape[0] != num_instances for state in states.values()):
            raise TensorforceError.value(
                name=function_name,
                argument='len(states)',
                value=[state.shape[0] for state in states.values()],
                hint='inconsistent')

        return states, batched, num_instances, is_iter_of_dicts
Exemple #4
0
    def act(self,
            states,
            internals=None,
            parallel=0,
            independent=False,
            deterministic=True,
            **kwargs):
        # Independent and internals
        is_internals_none = (internals is None)
        if independent:
            if parallel != 0:
                raise TensorforceError.invalid(name='Agent.act',
                                               argument='parallel',
                                               condition='independent is true')
            if is_internals_none and len(self.internals_spec) > 0:
                raise TensorforceError.required(
                    name='Agent.act',
                    argument='internals',
                    condition='independent is true')
        else:
            if not is_internals_none:
                raise TensorforceError.invalid(
                    name='Agent.act',
                    argument='internals',
                    condition='independent is false')

        # Process states input and infer batching structure
        states, batched, num_parallel, is_iter_of_dicts = self._process_states_input(
            states=states, function_name='Agent.act')

        if independent:
            # Independent mode: handle internals argument
            if is_internals_none:
                # Default input internals=None
                pass

            elif is_iter_of_dicts or isinstance(internals, (tuple, list)):
                # Input structure iter[dict[internal]]
                if not isinstance(internals, (tuple, list)):
                    raise TensorforceError.type(name='Agent.act',
                                                argument='internals',
                                                dtype=type(internals),
                                                hint='is not tuple/list')
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=internals[1:])

            else:
                # Input structure dict[iter[internal]]
                if not isinstance(internals, dict):
                    raise TensorforceError.type(name='Agent.act',
                                                argument='internals',
                                                dtype=type(internals),
                                                hint='is not dict')
                internals = ArrayDict(internals)

            if not independent or not is_internals_none:
                # Expand inputs if not batched
                if not batched:
                    internals = internals.fmap(
                        function=(lambda x: np.expand_dims(x, axis=0)))

                # Check number of inputs
                for name, internal in internals.items():
                    if internal.shape[0] != num_parallel:
                        raise TensorforceError.value(
                            name='Agent.act',
                            argument='len(internals[{}])'.format(name),
                            value=internal.shape[0],
                            hint='!= len(states)')

        else:
            # Non-independent mode: handle parallel input
            if batched:
                # Batched input
                parallel = np.asarray(parallel)

            elif parallel == 0:
                # Default input parallel=0
                if batched:
                    assert num_parallel == self.parallel_interactions
                    parallel = np.asarray(list(range(num_parallel)))
                else:
                    parallel = np.asarray([parallel])

            else:
                # Expand input if not batched
                parallel = np.asarray([parallel])

            # Check number of inputs
            if parallel.shape[0] != num_parallel:
                raise TensorforceError.value(name='Agent.act',
                                             argument='len(parallel)',
                                             value=len(parallel),
                                             hint='!= len(states)')

        # If not independent, check whether previous timesteps were completed
        if not independent:
            if not self.timestep_completed[parallel].all():
                raise TensorforceError(
                    message=
                    "Calling agent.act must be preceded by agent.observe for training, or "
                    "agent.act argument 'independent' must be passed as True.")
            self.timestep_completed[parallel] = False

        # Buffer inputs for recording
        if self.recorder is not None and not independent and \
                self.num_episodes >= self.recorder.get('start', 0):
            for n in range(num_parallel):
                for name in self.states_spec:
                    self.buffers['states'][name][parallel[n]].append(
                        states[name][n])

        # fn_act()
        if self._is_agent:
            actions, internals = self.fn_act(
                states=states,
                internals=internals,
                parallel=parallel,
                independent=independent,
                deterministic=deterministic,
                is_internals_none=is_internals_none,
                num_parallel=num_parallel)
        else:
            if batched:
                assert False
            else:
                states = states.fmap(function=(
                    lambda x: x[0].item() if x.shape == (1, ) else x[0]))
                actions = self.fn_act(states.to_kwargs())
                if self.actions_spec.is_singleton():
                    actions = ArrayDict(singleton=np.asarray([actions]))
                else:
                    actions = ArrayDict(actions)
                    actions = actions.fmap(
                        function=(lambda x: np.asarray([x])))

        # Buffer outputs for recording
        if self.recorder is not None and not independent and \
                self.num_episodes >= self.recorder.get('start', 0):
            for n in range(num_parallel):
                for name in self.actions_spec:
                    self.buffers['actions'][name][parallel[n]].append(
                        actions[name][n])

        # Unbatch actions
        if batched:
            # If inputs were batched, turn dict of lists into list of dicts
            function = (lambda x: x.item() if x.shape == () else x)
            # TODO: recursive
            if self.actions_spec.is_singleton():
                actions = actions.singleton()
                if is_iter_of_dicts:
                    actions = [
                        function(actions[n]) for n in range(num_parallel)
                    ]
            else:
                if is_iter_of_dicts:
                    actions = [
                        OrderedDict(((name, function(x[n]))
                                     for name, x in actions.items()))
                        for n in range(num_parallel)
                    ]
                else:
                    actions = OrderedDict(actions.items())

            if independent and not is_internals_none:
                if is_iter_of_dicts:
                    # TODO: recursive
                    internals = [
                        OrderedDict(((name, function(x[n]))
                                     for name, x in internals.items()))
                        for n in range(num_parallel)
                    ]
                else:
                    internals = OrderedDict(internals.items())

        else:
            # If inputs were not batched, unbatch outputs
            function = (lambda x: x.item() if x.shape == (1, ) else x[0])
            if self.actions_spec.is_singleton():
                actions = function(actions.singleton())
            else:
                actions = actions.fmap(function=function, cls=OrderedDict)
            if independent and not is_internals_none:
                internals = internals.fmap(function=function, cls=OrderedDict)

        if independent and not is_internals_none:
            return actions, internals
        else:
            return actions
Exemple #5
0
    def pretrain(self,
                 directory,
                 num_iterations,
                 num_traces=1,
                 num_updates=1,
                 extension='.npz'):
        """
        Simple pretraining approach as a combination of `experience()` and `update`, akin to
        behavioral cloning, using experience traces obtained e.g. via recording agent interactions
        ([see documentation](https://tensorforce.readthedocs.io/en/latest/basics/features.html#record-pretrain)).

        For the given number of iterations, load the given number of trace files (which each contain
        recorder[frequency] episodes), feed the experience to the agent's internal memory, and
        subsequently trigger the given number of updates (which will use the experience in the
        internal memory, fed in this or potentially previous iterations).

        See the [record-and-pretrain script](https://github.com/tensorforce/tensorforce/blob/master/examples/record_and_pretrain.py)
        for an example application.

        Args:
            directory (path): Directory with experience traces, e.g. obtained via recorder; episode
                length has to be consistent with agent configuration
                (<span style="color:#C00000"><b>required</b></span>).
            num_iterations (int > 0): Number of iterations consisting of loading new traces and
                performing multiple updates
                (<span style="color:#C00000"><b>required</b></span>).
            num_traces (int > 0): Number of traces to load per iteration; has to at least satisfy
                the update batch size
                (<span style="color:#00C000"><b>default</b></span>: 1).
            num_updates (int > 0): Number of updates per iteration
                (<span style="color:#00C000"><b>default</b></span>: 1).
            extension (str): Traces file extension to filter the given directory for
                (<span style="color:#00C000"><b>default</b></span>: ".npz").
        """
        if not os.path.isdir(directory):
            raise TensorforceError.value(name='agent.pretrain',
                                         argument='directory',
                                         value=directory)
        files = sorted(
            os.path.join(directory, f) for f in os.listdir(directory)
            if os.path.isfile(os.path.join(directory, f))
            and os.path.splitext(f)[1] == extension)
        indices = list(range(len(files)))

        for _ in range(num_iterations):
            shuffle(indices)
            if num_traces is None:
                selection = indices
            else:
                selection = indices[:num_traces]

            batch = None
            for index in selection:
                trace = ArrayDict(np.load(files[index]))
                if batch is None:
                    batch = trace
                else:
                    batch = batch.fmap(
                        function=(lambda x, y: np.concatenate([x, y], axis=0)),
                        zip_values=(trace, ))

            for name, value in batch.pop('auxiliaries', dict()).items():
                assert name.endswith('/mask')
                batch['states'][name[:-5] + '_mask'] = value

            self.experience(**batch.to_kwargs())
            for _ in range(num_updates):
                self.update()
Exemple #6
0
    def experience(self, states, actions, terminal, reward, internals=None):
        """
        Feed experience traces.

        See the [act-experience-update script](https://github.com/tensorforce/tensorforce/blob/master/examples/act_experience_update_interface.py)
        for an example application as part of the act-experience-update interface, which is an
        alternative to the act-observe interaction pattern.

        Args:
            states (dict[array[state]]): Dictionary containing arrays of states
                (<span style="color:#C00000"><b>required</b></span>).
            actions (dict[array[action]]): Dictionary containing arrays of actions
                (<span style="color:#C00000"><b>required</b></span>).
            terminal (array[bool]): Array of terminals
                (<span style="color:#C00000"><b>required</b></span>).
            reward (array[float]): Array of rewards
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[state]): Dictionary containing arrays of internal agent states
                (<span style="color:#C00000"><b>required</b></span> if agent has internal states).
        """
        if not all(len(buffer) == 0 for buffer in self.terminal_buffer):
            raise TensorforceError(
                message="Calling agent.experience is not possible mid-episode."
            )

        # Process states input and infer batching structure
        states, batched, num_instances, is_iter_of_dicts = self._process_states_input(
            states=states, function_name='Agent.experience')

        if is_iter_of_dicts:
            # Input structure iter[dict[input]]

            # Internals
            if internals is None:
                internals = ArrayDict(self.initial_internals())
                internals = internals.fmap(function=(lambda x: np.repeat(
                    np.expand_dims(x, axis=0), repeats=num_instances, axis=0)))
            elif not isinstance(internals, (tuple, list)):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='internals',
                                            dtype=type(internals),
                                            hint='is not tuple/list')
            else:
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=internals[1:])

            # Actions
            if isinstance(actions, np.ndarray):
                actions = ArrayDict(singleton=actions)
            elif not isinstance(actions, (tuple, list)):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='actions',
                                            dtype=type(actions),
                                            hint='is not tuple/list')
            elif not isinstance(actions[0], dict):
                actions = ArrayDict(singleton=np.asarray(actions))
            else:
                actions = [ArrayDict(action) for action in actions]
                actions = actions[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)),
                    zip_values=actions[1:])

        else:
            # Input structure dict[iter[input]]

            # Internals
            if internals is None:
                internals = ArrayDict(self.initial_internals())
                internals = internals.fmap(function=(lambda x: np.tile(
                    np.expand_dims(x, axis=0), reps=(num_instances, ))))
            elif not isinstance(internals, dict):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='internals',
                                            dtype=type(internals),
                                            hint='is not dict')
            else:
                internals = ArrayDict(internals)

            # Actions
            if not isinstance(actions, np.ndarray):
                actions = ArrayDict(singleton=actions)
            elif not isinstance(actions, dict):
                raise TensorforceError.type(name='Agent.experience',
                                            argument='actions',
                                            dtype=type(actions),
                                            hint='is not dict')
            else:
                actions = ArrayDict(actions)

        # Expand inputs if not batched
        if not batched:
            internals = internals.fmap(
                function=(lambda x: np.expand_dims(x, axis=0)))
            actions = actions.fmap(
                function=(lambda x: np.expand_dims(x, axis=0)))
            terminal = np.asarray([terminal])
            reward = np.asarray([reward])
        else:
            terminal = np.asarray(terminal)
            reward = np.asarray(reward)

        # Check number of inputs
        for name, internal in internals.items():
            if internal.shape[0] != num_instances:
                raise TensorforceError.value(
                    name='Agent.experience',
                    argument='len(internals[{}])'.format(name),
                    value=internal.shape[0],
                    hint='!= len(states)')
        for name, action in actions.items():
            if action.shape[0] != num_instances:
                raise TensorforceError.value(
                    name='Agent.experience',
                    argument='len(actions[{}])'.format(name),
                    value=action.shape[0],
                    hint='!= len(states)')
        if terminal.shape[0] != num_instances:
            raise TensorforceError.value(name='Agent.experience',
                                         argument='len(terminal)'.format(name),
                                         value=terminal.shape[0],
                                         hint='!= len(states)')
        if reward.shape[0] != num_instances:
            raise TensorforceError.value(name='Agent.experience',
                                         argument='len(reward)'.format(name),
                                         value=reward.shape[0],
                                         hint='!= len(states)')

        def function(name, spec):
            auxiliary = ArrayDict()
            if self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                if name is None:
                    name = 'action'
                # Mask, either part of states or default all true
                auxiliary['mask'] = states.pop(
                    name + '_mask',
                    np.ones(shape=(num_instances, ) + spec.shape +
                            (spec.num_values, ),
                            dtype=spec.np_type()))
            return auxiliary

        auxiliaries = self.actions_spec.fmap(function=function,
                                             cls=ArrayDict,
                                             with_names=True)
        if self.states_spec.is_singleton() and not states.is_singleton():
            states[None] = states.pop('state')

        # Convert terminal to int if necessary
        if terminal.dtype is util.np_dtype(dtype='bool'):
            zeros = np.zeros_like(terminal, dtype=util.np_dtype(dtype='int'))
            ones = np.ones_like(terminal, dtype=util.np_dtype(dtype='int'))
            terminal = np.where(terminal, ones, zeros)

        if terminal[-1] == 0:
            raise TensorforceError(
                message="Agent.experience() requires full episodes as input.")

        # Batch experiences split into episodes and at most size buffer_observe
        last = 0
        for index in range(1, len(terminal) + 1):
            if terminal[index - 1] == 0:
                continue

            function = (lambda x: x[last:index])
            states_batch = states.fmap(function=function)
            internals_batch = internals.fmap(function=function)
            auxiliaries_batch = auxiliaries.fmap(function=function)
            actions_batch = actions.fmap(function=function)
            terminal_batch = function(terminal)
            reward_batch = function(reward)
            last = index

            # Inputs to tensors
            states_batch = self.states_spec.to_tensor(
                value=states_batch,
                batched=True,
                name='Agent.experience states')
            internals_batch = self.internals_spec.to_tensor(
                value=internals_batch,
                batched=True,
                recover_empty=True,
                name='Agent.experience internals')
            auxiliaries_batch = self.auxiliaries_spec.to_tensor(
                value=auxiliaries_batch,
                batched=True,
                name='Agent.experience auxiliaries')
            actions_batch = self.actions_spec.to_tensor(
                value=actions_batch,
                batched=True,
                name='Agent.experience actions')
            terminal_batch = self.terminal_spec.to_tensor(
                value=terminal_batch,
                batched=True,
                name='Agent.experience terminal')
            reward_batch = self.reward_spec.to_tensor(
                value=reward_batch,
                batched=True,
                name='Agent.experience reward')

            # Model.experience()
            timesteps, episodes = self.model.experience(
                states=states_batch,
                internals=internals_batch,
                auxiliaries=auxiliaries_batch,
                actions=actions_batch,
                terminal=terminal_batch,
                reward=reward_batch)
            self.timesteps = timesteps.numpy().item()
            self.episodes = episodes.numpy().item()

        if self.model.saver is not None:
            self.model.save()
 def internals_init(self):
     return ArrayDict()
Exemple #8
0
    def pretrain(self,
                 directory,
                 num_iterations,
                 num_traces=1,
                 num_updates=1,
                 extension='.npz'):
        """
        Naive pretraining approach as a combination of `experience()` and `update`, uses experience
        traces obtained e.g. via recorder argument.

        See [record-and-pretrain script](https://github.com/tensorforce/tensorforce/blob/master/examples/record_and_pretrain.py)
        for illustrative example.

        Args:
            directory (path): Directory with experience traces, e.g. obtained via recorder; episode
                length has to be consistent with agent configuration
                (<span style="color:#C00000"><b>required</b></span>).
            num_iterations (int > 0): Number of iterations consisting of loading new traces and
                performing multiple updates
                (<span style="color:#C00000"><b>required</b></span>).
            num_traces (int > 0): Number of traces to load per iteration; has to at least satisfy
                the update batch size
                (<span style="color:#00C000"><b>default</b></span>: 1).
            num_updates (int > 0): Number of updates per iteration
                (<span style="color:#00C000"><b>default</b></span>: 1).
            extension (str): Traces file extension to filter the given directory for
                (<span style="color:#00C000"><b>default</b></span>: ".npz").
        """
        if not os.path.isdir(directory):
            raise TensorforceError.value(name='agent.pretrain',
                                         argument='directory',
                                         value=directory)
        files = sorted(
            os.path.join(directory, f) for f in os.listdir(directory)
            if os.path.isfile(os.path.join(directory, f))
            and os.path.splitext(f)[1] == extension)
        indices = list(range(len(files)))

        for _ in range(num_iterations):
            shuffle(indices)
            if num_traces is None:
                selection = indices
            else:
                selection = indices[:num_traces]

            # function = (lambda x: list())
            # values = ListDict()
            # values['states'] = self.states_spec.fmap(function=function, cls=ListDict)
            # values['auxiliaries'] = self.auxiliaries_spec.fmap(function=function, cls=ListDict)
            # values['actions'] = self.actions_spec.fmap(function=function, cls=ListDict)
            # values['terminal'] = list()
            # values['reward'] = list()
            batch = None
            for index in selection:
                trace = ArrayDict(np.load(files[index]))
                if batch is None:
                    batch = trace
                else:
                    batch = batch.fmap(
                        function=(lambda x, y: np.concatenate([x, y], axis=0)),
                        zip_values=(trace, ))

            for name, value in batch.pop('auxiliaries', dict()).items():
                assert name.endswith('/mask')
                batch['states'][name[:-5] + '_mask'] = value

            # values = values.fmap(function=np.concatenate, cls=ArrayDict)

            self.experience(**batch.to_kwargs())
            for _ in range(num_updates):
                self.update()
Exemple #9
0
    def act(
        self, states, internals=None, parallel=0, independent=False,
        # Deprecated
        deterministic=None, evaluation=None
    ):
        """
        Returns action(s) for the given state(s), needs to be followed by `observe()` unless
        independent mode.

        Args:
            states (dict[state] | iter[dict[state]]): Dictionary containing state(s) to be acted on
                (<span style="color:#C00000"><b>required</b></span>).
            internals (dict[internal] | iter[dict[internal]]): Dictionary containing current
                internal agent state(s), either given by `initial_internals()` at the beginning of
                an episode or as return value of the preceding `act()` call
                (<span style="color:#C00000"><b>required</b></span> if independent mode and agent
                has internal states).
            parallel (int | iter[int]): Parallel execution index
                (<span style="color:#00C000"><b>default</b></span>: 0).
            independent (bool): Whether act is not part of the main agent-environment interaction,
                and this call is thus not followed by observe
                (<span style="color:#00C000"><b>default</b></span>: false).

        Returns:
            dict[action] | iter[dict[action]], dict[internal] | iter[dict[internal]] if `internals`
            argument given: Dictionary containing action(s), dictionary containing next internal
            agent state(s) if independent mode.
        """
        if deterministic is not None:
            raise TensorforceError.deprecated(
                name='Agent.act', argument='deterministic', replacement='independent'
            )
        if evaluation is not None:
            raise TensorforceError.deprecated(
                name='Agent.act', argument='evaluation', replacement='independent'
            )

        # Independent and internals
        if independent:
            if parallel != 0:
                raise TensorforceError.invalid(
                    name='Agent.act', argument='parallel', condition='independent is true'
                )
            is_internals_none = (internals is None)
            if is_internals_none and len(self.internals_spec) > 0:
                raise TensorforceError.required(
                    name='Agent.act', argument='internals', condition='independent is true'
                )
        else:
            if internals is not None:
                raise TensorforceError.invalid(
                    name='Agent.act', argument='internals', condition='independent is false'
                )

        # Process states input and infer batching structure
        states, batched, num_parallel, is_iter_of_dicts, input_type = self._process_states_input(
            states=states, function_name='Agent.act'
        )

        if independent:
            # Independent mode: handle internals argument

            if is_internals_none:
                # Default input internals=None
                pass

            elif is_iter_of_dicts:
                # Input structure iter[dict[internal]]
                if not isinstance(internals, (tuple, list)):
                    raise TensorforceError.type(
                        name='Agent.act', argument='internals', dtype=type(internals),
                        hint='is not tuple/list'
                    )
                internals = [ArrayDict(internal) for internal in internals]
                internals = internals[0].fmap(
                    function=(lambda *xs: np.stack(xs, axis=0)), zip_values=internals[1:]
                )

            else:
                # Input structure dict[iter[internal]]
                if not isinstance(internals, dict):
                    raise TensorforceError.type(
                        name='Agent.act', argument='internals', dtype=type(internals),
                        hint='is not dict'
                    )
                internals = ArrayDict(internals)

            if not independent or not is_internals_none:
                # Expand inputs if not batched
                if not batched:
                    internals = internals.fmap(function=(lambda x: np.expand_dims(x, axis=0)))

                # Check number of inputs
                for name, internal in internals.items():
                    if internal.shape[0] != num_parallel:
                        raise TensorforceError.value(
                            name='Agent.act', argument='len(internals[{}])'.format(name),
                            value=internal.shape[0], hint='!= len(states)'
                        )

        else:
            # Non-independent mode: handle parallel input

            if parallel == 0:
                # Default input parallel=0
                if batched:
                    assert num_parallel == self.parallel_interactions
                    parallel = np.asarray(list(range(num_parallel)))
                else:
                    parallel = np.asarray([parallel])

            elif batched:
                # Batched input
                parallel = np.asarray(parallel)

            else:
                # Expand input if not batched
                parallel = np.asarray([parallel])

            # Check number of inputs
            if parallel.shape[0] != num_parallel:
                raise TensorforceError.value(
                    name='Agent.act', argument='len(parallel)', value=len(parallel),
                    hint='!= len(states)'
                )

        def function(name, spec):
            auxiliary = ArrayDict()
            if self.config.enable_int_action_masking and spec.type == 'int' and \
                    spec.num_values is not None:
                # Mask, either part of states or default all true
                auxiliary['mask'] = states.pop(name + '_mask', np.ones(
                    shape=(num_parallel,) + spec.shape + (spec.num_values,), dtype=spec.np_type()
                ))
            return auxiliary

        auxiliaries = self.actions_spec.fmap(function=function, cls=ArrayDict, with_names=True)

        # If not independent, check whether previous timesteps were completed
        if not independent:
            if not self.timestep_completed[parallel].all():
                raise TensorforceError(
                    message="Calling agent.act must be preceded by agent.observe."
                )
            self.timestep_completed[parallel] = False

        # Buffer inputs for recording
        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            for n in range(num_parallel):
                for name in self.states_spec:
                    self.buffers['states'][name][parallel[n]].append(states[name][n])
                for name in self.auxiliaries_spec:
                    self.buffers['auxiliaries'][name][parallel[n]].append(auxiliaries[name][n])

        # Inputs to tensors
        states = self.states_spec.to_tensor(value=states, batched=True)
        if independent and not is_internals_none:
            internals = self.internals_spec.to_tensor(value=internals, batched=True)
        auxiliaries = self.auxiliaries_spec.to_tensor(value=auxiliaries, batched=True)
        parallel_tensor = self.parallel_spec.to_tensor(value=parallel, batched=True)

        # Model.act()
        if not independent:
            actions, timesteps = self.model.act(
                states=states, auxiliaries=auxiliaries, parallel=parallel_tensor
            )
            self.timesteps = timesteps.numpy().item()

        elif len(self.internals_spec) > 0:
            if len(self.auxiliaries_spec) > 0:
                actions_internals = self.model.independent_act(
                    states=states, internals=internals, auxiliaries=auxiliaries
                )
            else:
                assert len(auxiliaries) == 0
                actions_internals = self.model.independent_act(states=states, internals=internals)
            actions_internals = TensorDict(actions_internals)
            actions = actions_internals['actions']
            internals = actions_internals['internals']

        else:
            if len(self.auxiliaries_spec) > 0:
                actions = self.model.independent_act(states=states, auxiliaries=auxiliaries)
            else:
                assert len(auxiliaries) == 0
                actions = self.model.independent_act(states=states)
            actions = TensorDict(actions)

        # Outputs from tensors
        # print(actions)
        actions = self.actions_spec.from_tensor(tensor=actions, batched=True)

        # Buffer outputs for recording
        if self.recorder_spec is not None and not independent and \
                self.episodes >= self.recorder_spec.get('start', 0):
            for n in range(num_parallel):
                for name in self.actions_spec:
                    self.buffers['actions'][name][parallel[n]].append(actions[name][n])

        # Unbatch actions
        if batched:
            # If inputs were batched, turn list of dicts into dict of lists
            function = (lambda x: x.item() if x.shape == () else x)
            if self.single_action:
                actions = input_type(function(actions['action'][n]) for n in range(num_parallel))
            else:
                # TODO: recursive
                actions = input_type(
                    OrderedDict(((name, function(x[n])) for name, x in actions.items()))
                    for n in range(num_parallel)
                )

            if independent and not is_internals_none and is_iter_of_dicts:
                # TODO: recursive
                internals = input_type(
                    OrderedDict(((name, function(x[n])) for name, x in internals.items()))
                    for n in range(num_parallel)
                )

        else:
            # If inputs were not batched, unbatch outputs
            function = (lambda x: x.item() if x.shape == (1,) else x[0])
            if self.single_action:
                actions = function(actions['action'])
            else:
                actions = actions.fmap(function=function, cls=OrderedDict)
            if independent and not is_internals_none:
                internals = internals.fmap(function=function, cls=OrderedDict)

        if self.model.saver is not None:
            self.model.save()

        if independent and not is_internals_none:
            return actions, internals
        else:
            return actions