def _make_step_spec(self, obs_spec): self._step_spec = dict( step_type=BoundedArraySpec(dtype=np.int8, shape=(), minimum=0, maximum=2, name='batched_env_step' '_type_spec'), reward=ArraySpec(dtype=np.float32, shape=(), name='batched_env_reward_spec'), discount=ArraySpec(dtype=np.float32, shape=(), name='batched_env_discount_spec'), observation=obs_spec) return self._step_spec
def __init__( self, obs_spec, step_output_spec, ): self._trajs = None # Don't use shape in the spec since it's unknown self._traj_spec = dict( step_type=ArraySpec(dtype=np.int8, shape=(None, None), name='traj_step_type_spec'), reward=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_reward_spec'), discount=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_discount_spec'), observation=nest.map_structure(expand_spec, obs_spec), step_output=nest.map_structure(expand_spec, step_output_spec))
def __init__(self, obs_spec, step_output_spec, batch_size, discount_factor, traj_length): self._batch_size = batch_size self._traj_len = traj_length self._discount_factor = discount_factor # Don't use shape in the spec since it's unknown self._traj_spec = dict( step_type=ArraySpec(dtype=np.int8, shape=(None, None), name='traj_step_type_spec'), reward=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_reward_spec'), discount=ArraySpec(dtype=np.float32, shape=(None, None), name='traj_discount_spec'), observation=nest.map_structure(expand_spec, obs_spec), step_output=nest.map_structure(expand_spec, step_output_spec)) # self._trajs[i] = trajectory of the ith experience in the batch. self._trajs = None # list of timesteps that have been backtracked # and ready to be split into chunks to be shipped out. # _finished_timesteps[i] = Finished timesteps for the ith item of the batch. self._finished_timesteps = None # used to chop the trajectory into chunks. obs_spec2 = copy.deepcopy(obs_spec) obs_spec2['bootstrap_value'] = ArraySpec(dtype=np.float32, shape=(None, ), name='bootstrap_value_spec') self._chopping_trajs = [ BaseTrajectory(obs_spec2, step_output_spec) for _ in range(batch_size) ] self._len = 0
def mk_spec(path_tuple, np_arr): return ArraySpec(np_arr.shape, np_arr.dtype, name='_'.join(path_tuple) + '_spec')
def observation_spec(self): features_spec = ArraySpec((2, ), np.float32, name='features_spec') return dict(features=features_spec)
def mk_spec(tensor): return ArraySpec(dtype=tensor.dtype.as_numpy_dtype, shape=tensor.shape, name=tensor.name)