def stack_specs(*specs): for spec in specs[1:]: assert spec.shape == specs[0].shape assert spec.dtype == specs[0].dtype assert spec.name == specs[0].name if isinstance(specs[0], BoundedArraySpec): min_min = specs[0].minimum max_max = specs[0].maximum for spec in specs: min_min = min(min_min, spec.minimum) max_max = max(max_max, spec.maximum) spec = BoundedArraySpec( (len(specs), ) + specs[0].shape, specs[0].dtype, min_min, max_max, name='batched_' + ('spec' if specs[0].name is None else specs[0].name)) else: specs[0].expand_dims(len(specs), axis=0) spec = specs[0] return spec
def action_spec(self): if self.config.muldi_actions: if self.config.k_schedule.enable: max_k = max(self.config.k_schedule.values) else: max_k = self.k return BoundedArraySpec((max_k, ), np.int32, minimum=0, maximum=self._max_nodes - 1, name='action_spec') else: return BoundedArraySpec((), np.int32, minimum=0, maximum=self._max_nodes - 1, name='action_spec')
def _make_step_spec(self, obs_spec): self._step_spec = dict( step_type=BoundedArraySpec(dtype=np.int8, shape=(), minimum=0, maximum=2, name='batched_env_step' '_type_spec'), reward=ArraySpec(dtype=np.float32, shape=(), name='batched_env_reward_spec'), discount=ArraySpec(dtype=np.float32, shape=(), name='batched_env_discount_spec'), observation=obs_spec) return self._step_spec
def action_spec(self): return BoundedArraySpec((), np.int32, minimum=0, maximum=len(self._graph_features.nodes) - 1, name='action_spec')
def action_spec(self): return BoundedArraySpec((), np.int32, minimum=0, maximum=len(self._variable_nodes) - 1, name='action_spec')
def action_spec(self): return BoundedArraySpec((), np.int32, minimum=0, maximum=len(self._nx_graph) - 1, name='action_spec')
def action_spec(self): return BoundedArraySpec((), np.int32, minimum=0, maximum=1, name='action_spec')