def act(self, states, internals=None, parallel=0, independent=False, deterministic=False, evaluation=False, query=None, **kwargs): """ Returns action(s) for the given state(s), needs to be followed by `observe(...)` unless independent mode set via `independent`/`evaluation`. Args: states (dict[state] | iter[dict[state]]): Dictionary containing state(s) to be acted on (<span style="color:#C00000"><b>required</b></span>). internals (dict[internal] | iter[dict[internal]]): Dictionary containing current internal agent state(s) (<span style="color:#C00000"><b>required</b></span> if independent mode). parallel (int | iter[int]): Parallel execution index (<span style="color:#00C000"><b>default</b></span>: 0). independent (bool): Whether act is not part of the main agent-environment interaction, and this call is thus not followed by observe (<span style="color:#00C000"><b>default</b></span>: false). deterministic (bool): Ff independent mode, whether to act deterministically, so no exploration and sampling (<span style="color:#00C000"><b>default</b></span>: false). evaluation (bool): Whether the agent is currently evaluated, implies independent and deterministic (<span style="color:#00C000"><b>default</b></span>: false). query (list[str]): Names of tensors to retrieve (<span style="color:#00C000"><b>default</b></span>: none). kwargs: Additional input values, for instance, for dynamic hyperparameters. Returns: dict[action] | iter[dict[action]], if independent mode dict[internal] | iter[dict[internal]], plus optional list[str]: Dictionary containing action(s), dictionary containing next internal agent state(s) if independent mode, plus queried tensor values if requested. """ assert util.reduce_all(predicate=util.not_nan_inf, xs=states) if evaluation: if deterministic: raise TensorforceError.invalid(name='agent.act', argument='deterministic', condition='evaluation = true') if independent: raise TensorforceError.invalid(name='agent.act', argument='independent', condition='evaluation = true') deterministic = independent = True if not independent: if internals is not None: raise TensorforceError.invalid(name='agent.act', argument='internals', condition='independent = false') if deterministic: raise TensorforceError.invalid(name='agent.act', argument='deterministic', condition='independent = false') if independent: internals_is_none = (internals is None) if internals_is_none: internals = OrderedDict() # Batch states batched = (not isinstance(parallel, int)) if batched: if len(parallel) == 0: raise TensorforceError.value(name='agent.act', argument='parallel', value=parallel, hint='zero-length') parallel = np.asarray(list(parallel)) if isinstance(states[0], dict): states = OrderedDict( ((name, np.asarray( [states[n][name] for n in range(len(parallel))])) for name in states[0])) else: states = np.asarray(states) if independent: internals = OrderedDict( ((name, np.asarray( [internals[n][name] for n in range(len(parallel))])) for name in internals[0])) else: parallel = np.asarray([parallel]) states = util.fmap(function=(lambda x: np.asarray([x])), xs=states, depth=int(isinstance(states, dict))) if independent: internals = util.fmap(function=(lambda x: np.asarray([x])), xs=internals, depth=1) if not independent and not all(self.timestep_completed[n] for n in parallel): raise TensorforceError( message="Calling agent.act must be preceded by agent.observe.") # Auxiliaries auxiliaries = OrderedDict() if isinstance(states, dict): states = dict(states) for name, spec in self.actions_spec.items(): if spec['type'] == 'int' and name + '_mask' in states: auxiliaries[name + '_mask'] = states.pop(name + '_mask') # Normalize states dictionary states = util.normalize_values(value_type='state', values=states, values_spec=self.states_spec) # Model.act() if independent: if query is None: actions, internals = self.model.independent_act( states=states, internals=internals, auxiliaries=auxiliaries, parallel=parallel, deterministic=deterministic, **kwargs) else: actions, internals, queried = self.model.independent_act( states=states, internals=internals, auxiliaries=auxiliaries, parallel=parallel, deterministic=deterministic, query=query, **kwargs) else: if query is None: actions, self.timesteps = self.model.act( states=states, auxiliaries=auxiliaries, parallel=parallel, **kwargs) else: actions, self.timesteps, queried = self.model.act( states=states, auxiliaries=auxiliaries, parallel=parallel, query=query, **kwargs) if not independent: for n in parallel: self.timestep_completed[n] = False if self.recorder_spec is not None and not independent and \ self.episodes >= self.recorder_spec.get('start', 0): for n in range(len(parallel)): index = self.buffer_indices[parallel[n]] for name in self.states_spec: self.states_buffers[name][parallel[n], index] = states[name][n] for name, spec in self.actions_spec.items(): self.actions_buffers[name][parallel[n], index] = actions[name][n] if spec['type'] == 'int': name = name + '_mask' if name in auxiliaries: self.states_buffers[name][ parallel[n], index] = auxiliaries[name][n] else: shape = (1, ) + spec['shape'] + ( spec['num_values'], ) self.states_buffers[name][parallel[n], index] = np.full( shape=shape, fill_value=True, dtype=util.np_dtype( dtype='bool')) # Reverse normalized actions dictionary actions = util.unpack_values(value_type='action', values=actions, values_spec=self.actions_spec) # Unbatch actions if batched: if isinstance(actions, dict): actions = [ OrderedDict(((name, actions[name][n]) for name in actions)) for n in range(len(parallel)) ] else: actions = util.fmap(function=(lambda x: x[0]), xs=actions, depth=int(isinstance(actions, dict))) if independent: internals = util.fmap(function=(lambda x: x[0]), xs=internals, depth=1) if independent and not internals_is_none: if query is None: return actions, internals else: return actions, internals, queried else: if query is None: return actions else: return actions, queried
def act( self, states, parallel=0, deterministic=False, independent=False, evaluation=False, query=None, **kwargs ): """ Returns action(s) for the given state(s), needs to be followed by `observe(...)` unless `independent` is true. Args: states (dict[state]): Dictionary containing state(s) to be acted on (<span style="color:#C00000"><b>required</b></span>). parallel (int): Parallel execution index (<span style="color:#00C000"><b>default</b></span>: 0). deterministic (bool): Whether to apply exploration and sampling (<span style="color:#00C000"><b>default</b></span>: false). independent (bool): Whether action is not remembered, and this call is thus not followed by observe (<span style="color:#00C000"><b>default</b></span>: false). evaluation (bool): Whether the agent is currently evaluated, implies and overwrites deterministic and independent (<span style="color:#00C000"><b>default</b></span>: false). query (list[str]): Names of tensors to retrieve (<span style="color:#00C000"><b>default</b></span>: none). kwargs: Additional input values, for instance, for dynamic hyperparameters. Returns: (dict[action], plus optional list[str]): Dictionary containing action(s), plus queried tensor values if requested. """ assert util.reduce_all(predicate=util.not_nan_inf, xs=states) # self.current_internals = self.next_internals if evaluation: if deterministic or independent: raise TensorforceError.unexpected() deterministic = independent = True # Auxiliaries auxiliaries = OrderedDict() if isinstance(states, dict): states = dict(states) for name, spec in self.actions_spec.items(): if spec['type'] == 'int' and name + '_mask' in states: auxiliaries[name + '_mask'] = states.pop(name + '_mask') # Normalize states dictionary states = util.normalize_values( value_type='state', values=states, values_spec=self.states_spec ) # Batch states states = util.fmap(function=(lambda x: np.asarray([x])), xs=states, depth=1) auxiliaries = util.fmap(function=(lambda x: np.asarray([x])), xs=auxiliaries, depth=1) # Model.act() if query is None: actions, self.timesteps = self.model.act( states=states, auxiliaries=auxiliaries, parallel=parallel, deterministic=deterministic, independent=independent, **kwargs ) else: actions, self.timesteps, queried = self.model.act( states=states, auxiliaries=auxiliaries, parallel=parallel, deterministic=deterministic, independent=independent, query=query, **kwargs ) if self.recorder_spec is not None and not independent and \ self.episodes >= self.recorder_spec.get('start', 0): index = self.buffer_indices[parallel] for name in self.states_spec: self.states_buffers[name][parallel, index] = states[name][0] for name, spec in self.actions_spec.items(): self.actions_buffers[name][parallel, index] = actions[name][0] if spec['type'] == 'int': name = name + '_mask' if name in auxiliaries: self.states_buffers[name][parallel, index] = auxiliaries[name][0] else: shape = (1,) + spec['shape'] + (spec['num_values'],) self.states_buffers[name][parallel, index] = np.full( shape=shape, fill_value=True, dtype=util.np_dtype(dtype='bool') ) # Unbatch actions actions = util.fmap(function=(lambda x: x[0]), xs=actions, depth=1) # Reverse normalized actions dictionary actions = util.unpack_values( value_type='action', values=actions, values_spec=self.actions_spec ) # if independent, return processed state as well? if query is None: return actions else: return actions, queried
def act(self, states, parallel=0, deterministic=False, independent=False, query=None, **kwargs): """ Return action(s) for given state(s). States preprocessing and exploration are applied if configured accordingly. Args: states (any): One state (usually a value tuple) or dict of states if multiple states are expected. deterministic (bool): If true, no exploration and sampling is applied. independent (bool): If true, action is not followed by observe (and hence not included in updates). fetch_tensors (list): Optional String of named tensors to fetch Returns: Scalar value of the action or dict of multiple actions the agent wants to execute. (fetched_tensors) Optional dict() with named tensors fetched """ # self.current_internals = self.next_internals # Normalize states dictionary states = util.normalize_values(value_type='state', values=states, values_spec=self.states_spec) # Batch states states = util.fmap(function=(lambda x: [x]), xs=states) # Model.act() if query is None: actions, self.timestep = self.model.act( states=states, parallel=parallel, deterministic=deterministic, independent=independent, **kwargs) else: actions, self.timestep, query = self.model.act( states=states, parallel=parallel, deterministic=deterministic, independent=independent, query=query, **kwargs) # Unbatch actions actions = util.fmap(function=(lambda x: x[0]), xs=actions) # Reverse normalized actions dictionary actions = util.unpack_values(value_type='action', values=actions, values_spec=self.actions_spec) # if independent, return processed state as well? if query is None: return actions else: return actions, query