def __init__(self, env): self._env = env self._obs_spec = OrderedDict() wrapped_obs_spec = env.observation_spec().copy() dim = 0 for key in wrapped_obs_spec.keys(): if key != MANIP_PIXELS_KEY: spec = wrapped_obs_spec[key] assert spec.dtype == np.float64 assert type(spec) == specs.Array dim += np.prod(spec.shape) self._obs_spec['features'] = specs.Array(shape=(dim, ), dtype=np.float32, name='features') if MANIP_PIXELS_KEY in wrapped_obs_spec: spec = wrapped_obs_spec[MANIP_PIXELS_KEY] self._obs_spec['pixels'] = specs.BoundedArray(shape=spec.shape[1:], dtype=spec.dtype, minimum=spec.minimum, maximum=spec.maximum, name='pixels') self._obs_spec['state'] = specs.Array( shape=self._env.physics.get_state().shape, dtype=np.float32, name='state')
def observation_spec(self, *args, **kwargs): new_spec = self._unity_environment.observation_spec().copy() # The block observation is exactly as we get it block_obs_shape = [0, new_spec[constants.BLOCK].shape[1]] block_obs_dtype = new_spec[constants.BLOCK].dtype # We know the observation is the same for all block types. for name in _OBJECT_TYPE_NAMES: new_spec[name] = specs.Array(block_obs_shape, dtype=block_obs_dtype, name=name) if "Segmentation" in list(new_spec.keys()): segmentation_resolution = new_spec["Segmentation"].shape[:2] segmentation_obs_shape = (0, ) + segmentation_resolution for name in _OBJECT_TYPE_NAMES: obs_name = "SegmentationMasks" + name new_spec[obs_name] = specs.Array(segmentation_obs_shape, dtype=np.bool, name=obs_name) del new_spec["Segmentation"] new_spec.update({ "ContactPairs": specs.Array([0, 2], dtype=np.int32, name="ContactPairs") }) new_spec.update({ "ContactFeatures": specs.Array([0, 1], dtype=new_spec["Contacts"].dtype, name="ContactFeatures") }) del new_spec["Contacts"] return new_spec
def _spec_from_observation(observation): #wip make it run if it is ordereddict # if not type(observation) is dict: # return specs.Array(observation.shape, observation.dtype) if not isinstance(observation, collections.OrderedDict): return specs.Array(observation.shape, observation.dtype) result = collections.OrderedDict() for key, value in six.iteritems(observation): result[key] = specs.Array(value.shape, value.dtype, name=key) return result
def observation_spec(self): action_mask_spec = nest.map_structure( lambda _: specs.Array(shape=(), dtype=np.float32), self._action_masks["move"]) canvas_shape = (self._canvas_width, self._canvas_width, 3) return collections.OrderedDict([ ("canvas", specs.Array(shape=canvas_shape, dtype=np.float32)), ("episode_step", specs.Array(shape=(), dtype=np.int32)), ("episode_length", specs.Array(shape=(), dtype=np.int32)), ("action_mask", action_mask_spec) ])
def observation_spec(self): """Returns the observation spec.""" return ( specs.Array(shape=self.adj_mat.shape, dtype=np.bool, name="adjacency_matrix"), specs.Array(shape=(self.adj_mat.shape[0], ), dtype=np.bool, name="burned"), specs.Array(shape=(self.adj_mat.shape[0], ), dtype=np.bool, name="defended"), )
def test_buffer(self): # Given a buffer and some dummy data... max_sequence_length = 10 obs_shape = (3, 3) buffer = sequence.Buffer( obs_spec=specs.Array(obs_shape, dtype=np.float), action_spec=specs.Array((), dtype=np.int), max_sequence_length=max_sequence_length) dummy_step = dm_env.transition(observation=np.zeros(obs_shape), reward=0.) # If we add `max_sequence_length` items to the buffer... for _ in range(max_sequence_length): buffer.append(dummy_step, 0, dummy_step) # Then the buffer should now be full. self.assertTrue(buffer.full()) # Any further appends should throw an error. with self.assertRaises(ValueError): buffer.append(dummy_step, 0, dummy_step) # If we now drain this trajectory from the buffer... trajectory = buffer.drain() # The `observations` sequence should have length `T + 1`. self.assertLen(trajectory.observations, max_sequence_length + 1) # All other sequences should have length `T`. self.assertLen(trajectory.actions, max_sequence_length) self.assertLen(trajectory.rewards, max_sequence_length) self.assertLen(trajectory.discounts, max_sequence_length) # The buffer should now be empty. self.assertTrue(buffer.empty()) # A second call to drain() should throw an error, since the buffer is empty. with self.assertRaises(ValueError): buffer.drain() # If we now append another transition... buffer.append(dummy_step, 0, dummy_step) # And immediately drain the buffer... trajectory = buffer.drain() # We should have a valid partial trajectory of length T=1. self.assertLen(trajectory.observations, 2) self.assertLen(trajectory.actions, 1) self.assertLen(trajectory.rewards, 1) self.assertLen(trajectory.discounts, 1)
def __init__(self, environment, goal): self._environment = environment self._goal = goal original_spec = self.environment.observation_spec() self._observation_spec = { 'state': dm_env_specs.Array(shape=original_spec.shape, dtype=original_spec.dtype, name='state'), 'goal': dm_env_specs.Array(shape=self._goal.shape, dtype=self._goal.dtype, name='goal') }
def test_no_nested_specs(self): env = dm_env_adaptor.DmEnvAdaptor(connection=mock.MagicMock(), specs=_SAMPLE_NESTED_SPECS, nested_tensors=False) expected_actions = { 'foo.bar': specs.Array(shape=(), dtype=np.int32, name='foo.bar'), 'baz': specs.Array(shape=(), dtype=np.str_, name='baz'), } expected_observations = { 'foo.bar': specs.Array(shape=(), dtype=np.int32, name='foo.bar'), 'baz': specs.Array(shape=(), dtype=np.str_, name='baz'), } self.assertSameElements(expected_actions, env.action_spec()) self.assertSameElements(expected_observations, env.observation_spec())
def observation_spec(self, *args, **kwargs): new_spec = self._unity_environment.observation_spec().copy() # The block observation is exactly as we get it block_obs_shape = [ 0, self._length_dynamics, new_spec["Blocks"].shape[1] ] block_obs_dtype = new_spec["Blocks"].dtype # We know the observation is the same for all block types. for name in _OBJECT_TYPE_NAMES: new_spec[name] = specs.Array(block_obs_shape, dtype=block_obs_dtype, name=name) for key in ["RGB", "ObserverRGB"]: if key in new_spec: prev_spec = new_spec[key] new_spec[key] = specs.Array( (self._length_dynamics, ) + prev_spec.shape, dtype=prev_spec.dtype, name=prev_spec.name) if "Segmentation" in list(new_spec.keys()): segmentation_resolution = new_spec["Segmentation"].shape[:2] segmentation_obs_shape = ( 0, self._length_dynamics) + segmentation_resolution for name in _OBJECT_TYPE_NAMES: obs_name = "SegmentationMasks" + name new_spec[obs_name] = specs.Array(segmentation_obs_shape, dtype=np.bool, name=obs_name) del new_spec["Segmentation"] new_spec.update({ "SimulationOn": specs.Array([self._length_dynamics], dtype=np.bool, name="SimulationOn"), "TimeMask": specs.Array([self._length_dynamics], dtype=np.bool, name="TimeMask"), }) del new_spec["Contacts"] del new_spec["SpawnCollisionCount"] del new_spec["CollisionStop"] del new_spec["ElapsedTime"] return new_spec
def __init__(self, image_size=(64, 64), anti_aliasing=1, bg_color=None, color_to_rgb=None): """Construct PIL renderer. Args: image_size: Int tuple (height, width). Size of output of .render(). anti_aliasing: Int. Anti-aliasing factor. Linearly scales the size of the internal canvas. bg_color: None or 3-tuple of ints in [0, 255]. Background color. If None, background is (0, 0, 0). color_to_rgb: Callable converting a tuple (c1, c2, c3) to a uint8 tuple (r, g, b) in [0, 255]. """ self._image_size = image_size self._anti_aliasing = anti_aliasing self._canvas_size = (anti_aliasing * image_size[0], anti_aliasing * image_size[1]) if color_to_rgb is None: color_to_rgb = lambda x: x self._color_to_rgb = color_to_rgb if bg_color is None: bg_color = (0, 0, 0) self._canvas_bg = Image.new('RGB', self._canvas_size, bg_color) self._observation_spec = specs.Array(shape=self._image_size + (3, ), dtype=np.uint8) self._canvas = Image.new('RGB', self._canvas_size) self._draw = ImageDraw.Draw(self._canvas)
def observation_spec(): """Observation spec for all TCV environments.""" return { 'references': specs.Array(shape=(REF_RANGES.size, ), dtype=ENVIRONMENT_DATA_TYPE, name='references'), 'measurements': specs.Array(shape=(TCV_MEASUREMENT_RANGES.size, ), dtype=ENVIRONMENT_DATA_TYPE, name='measurements'), 'last_action': specs.Array(shape=(TCV_ACTION_RANGES.size, ), dtype=ENVIRONMENT_DATA_TYPE, name='last_action'), }
def observation_spec(self): """Returns the observation spec. """ return specs.Array( shape=(10,), dtype=np.float, name="market_observations" )
def test_reset(self): """Ensure that noop starts `reset` steps the environment multiple times.""" noop_action = 0 noop_max = 10 seed = 24 base_env = fakes.DiscreteEnvironment(action_dtype=np.int64, obs_dtype=np.int64, reward_spec=specs.Array( dtype=np.float64, shape=())) mock_step_fn = mock.MagicMock() expected_num_step_calls = np.random.RandomState(seed).randint( noop_max + 1) with mock.patch.object(base_env, 'step', mock_step_fn): env = wrappers.NoopStartsWrapper( base_env, noop_action=noop_action, noop_max=noop_max, seed=seed, ) env.reset() # Test environment step called with noop action as part of wrapper.reset mock_step_fn.assert_called_with(noop_action) self.assertEqual(mock_step_fn.call_count, expected_num_step_calls) self.assertEqual(mock_step_fn.call_args, ((noop_action, ), {}))
def _init_observation_spec(self): """Computes the observation spec for the pixel observations. Returns: An `Array` specification for the pixel observations. """ if self._to_float: pixels_dtype = float else: pixels_dtype = np.uint8 if self._grayscaling: pixels_spec_shape = (self._height, self._width) pixels_spec_name = "grayscale" else: pixels_spec_shape = (self._height, self._width, NUM_COLOR_CHANNELS) pixels_spec_name = "RGB" pixel_spec = specs.Array(shape=pixels_spec_shape, dtype=pixels_dtype, name=pixels_spec_name) pixel_spec = self._frame_stacker.update_spec(pixel_spec) if self._expose_lives_observation: return (pixel_spec, ) + self._environment.observation_spec()[1:] return pixel_spec
def _update_spec(self, base_spec): dummy_obs = utils.zeros_like(base_spec) emb, _ = self._distance_fn(dummy_obs['state'], dummy_obs['goal']) full_spec = dict(base_spec) full_spec['embeddings'] = (dm_env_specs.Array(shape=emb.shape, dtype=emb.dtype)) return full_spec
def __init__(self, observable, physics, random_state, strip_singleton_buffer_dim, pad_with_initial_value): self.observable = observable self.observation_callable = (observable.observation_callable( physics, random_state)) self._bind_attribute_from_observable('update_interval', DEFAULT_UPDATE_INTERVAL, random_state) self._bind_attribute_from_observable('delay', DEFAULT_DELAY, random_state) self._bind_attribute_from_observable('buffer_size', DEFAULT_BUFFER_SIZE, random_state) obs_spec = self.observable.array_spec if obs_spec is None: # We take an observation to determine the shape and dtype of the array. # This occurs outside of an episode and doesn't affect environment # behavior. At this point the physics state is not guaranteed to be valid, # so we might get a `PhysicsError` if the observation callable calls # `physics.forward`. We suppress such errors since they do not matter as # far as the shape and dtype of the observation are concerned. with physics.suppress_physics_errors(): obs_array = self.observation_callable() obs_array = np.asarray(obs_array) obs_spec = specs.Array(shape=obs_array.shape, dtype=obs_array.dtype) self.buffer = obs_buffer.Buffer( buffer_size=self.buffer_size, shape=obs_spec.shape, dtype=obs_spec.dtype, pad_with_initial_value=pad_with_initial_value, strip_singleton_buffer_dim=strip_singleton_buffer_dim) self.update_schedule = collections.deque()
def tensor_spec_to_dm_env_spec( tensor_spec: dm_env_rpc_pb2.TensorSpec) -> specs.Array: """Returns a dm_env spec given a dm_env_rpc TensorSpec. Args: tensor_spec: A dm_env_rpc TensorSpec protobuf. Returns: Either a DiscreteArray, BoundedArray, StringArray or Array, depending on the content of the TensorSpec. """ np_type = tensor_utils.data_type_to_np_type(tensor_spec.dtype) if tensor_spec.HasField('min') or tensor_spec.HasField('max'): bounds = tensor_spec_utils.bounds(tensor_spec) if (not tensor_spec.shape and np.issubdtype(np_type, np.integer) and bounds.min == 0 and tensor_spec.HasField('max')): return specs.DiscreteArray(num_values=bounds.max + 1, dtype=np_type, name=tensor_spec.name) else: return specs.BoundedArray(shape=tensor_spec.shape, dtype=np_type, name=tensor_spec.name, minimum=bounds.min, maximum=bounds.max) else: if tensor_spec.dtype == dm_env_rpc_pb2.DataType.STRING: return specs.StringArray(shape=tensor_spec.shape, name=tensor_spec.name) else: return specs.Array(shape=tensor_spec.shape, dtype=np_type, name=tensor_spec.name)
def chem_observation_spec(self) -> Dict[str, specs.Array]: return { k: specs.Array(shape=(see_chemistry.obs_size(), ), dtype=np.float32, name=k) for k, see_chemistry in self.see_chemistries.items() }
def testReplace(self, arg_name, new_value): old_spec = specs.Array([1, 5], np.float32, "test") new_spec = old_spec.replace(**{arg_name: new_value}) self.assertIsNot(old_spec, new_spec) self.assertEqual(getattr(new_spec, arg_name), new_value) for attr_name in set(["shape", "dtype", "name"]).difference([arg_name]): self.assertEqual(getattr(new_spec, attr_name), getattr(old_spec, attr_name))
def update_spec(self, spec: dm_env_specs.Array) -> dm_env_specs.Array: if not self._flatten: new_shape = spec.shape + (self._num_frames, ) else: new_shape = spec.shape[:-1] + (self._num_frames * spec.shape[-1], ) return dm_env_specs.Array(shape=new_shape, dtype=spec.dtype, name=spec.name)
def make_observation_spec_dict(enabled_dict): """Makes a dict of enabled observation specs from of observables.""" out_dict = type(enabled_dict)() for name, enabled in six.iteritems(enabled_dict): if enabled.observable.aggregator: aggregated = enabled.observable.aggregator( np.zeros(enabled.buffer.shape, dtype=enabled.buffer.dtype)) spec = specs.Array(shape=aggregated.shape, dtype=aggregated.dtype, name=name) else: spec = specs.Array(shape=enabled.buffer.shape, dtype=enabled.buffer.dtype, name=name) out_dict[name] = spec return out_dict
def test_no_bounds_gives_arrayspec(self): tensor_spec = dm_env_rpc_pb2.TensorSpec() tensor_spec.dtype = dm_env_rpc_pb2.DataType.UINT32 tensor_spec.shape[:] = [3] tensor_spec.name = 'foo' actual = dm_env_utils.tensor_spec_to_dm_env_spec(tensor_spec) self.assertEqual(specs.Array(shape=[3], dtype=np.uint32), actual) self.assertEqual('foo', actual.name)
def reward_spec(self): """Describes the reward returned by the environment. By default this is assumed to be a single float. Returns: An `Array` spec, or a nested dict, list or tuple of `Array` specs. """ return specs.Array(shape=(), dtype=float, name='reward')
def testNotEqualOtherClass(self): spec_1 = specs.Array((1, 2, 3), np.int32) spec_2 = None self.assertNotEqual(spec_1, spec_2) self.assertNotEqual(spec_2, spec_1) spec_2 = () self.assertNotEqual(spec_1, spec_2) self.assertNotEqual(spec_2, spec_1)
def testValidateDtype(self, value, is_valid): spec = specs.Array((1, 2), np.int32) if is_valid: # Should not raise any exception. spec.validate(value) else: with self.assertRaisesWithLiteralMatch( ValueError, specs._INVALID_DTYPE % (spec.dtype, value.dtype)): spec.validate(value)
def test_buffer(self): # Initialize the buffer. max_trajectory_length = 10 observation_shape = (3, 3) buffer = blib.Buffer(observation_spec=specs.Array(observation_shape, dtype=np.float32), action_spec=specs.Array((), dtype=np.int32), max_trajectory_length=max_trajectory_length) dummy_step = base.transition(action=0., reward=0., observation=np.zeros(observation_shape)) # Fill the buffer. for _ in range(max_trajectory_length): buffer.append(dummy_step, dummy_step) self.assertTrue(buffer.full()) # Any further appends should fail. with self.assertRaises(ValueError): buffer.append(dummy_step, dummy_step) # Drain the buffer. trajectory = buffer.drain() self.assertLen(trajectory.observations, max_trajectory_length + 1) self.assertLen(trajectory.actions, max_trajectory_length) self.assertLen(trajectory.rewards, max_trajectory_length) self.assertLen(trajectory.discounts, max_trajectory_length) self.assertTrue(buffer.empty()) # Draining an empty buffer should fail. with self.assertRaises(ValueError): buffer.drain() # Add an entry and immediately drain the buffer. buffer.append(dummy_step, dummy_step) trajectory = buffer.drain() self.assertLen(trajectory.observations, 2) self.assertLen(trajectory.actions, 1) self.assertLen(trajectory.rewards, 1) self.assertLen(trajectory.discounts, 1)
def test_spec(self): dm_env_rpc_specs = { 54: dm_env_rpc_pb2.TensorSpec(name='fuzz', shape=[3], dtype=dm_env_rpc_pb2.DataType.FLOAT), 55: dm_env_rpc_pb2.TensorSpec(name='foo', shape=[2], dtype=dm_env_rpc_pb2.DataType.INT32), } manager = spec_manager.SpecManager(dm_env_rpc_specs) expected = { 'foo': specs.Array(shape=[2], dtype=np.int32), 'fuzz': specs.Array(shape=[3], dtype=np.float32) } self.assertDictEqual(expected, dm_env_utils.dm_env_spec(manager))
def __init__(self, env): self._env = env self._obs_spec = OrderedDict() wrapped_obs_spec = env.observation_spec().copy() for key, spec in wrapped_obs_spec.items(): assert spec.dtype == np.float64 assert type(spec) == specs.Array dim = np.sum( np.fromiter((np.int(np.prod(spec.shape)) for spec in wrapped_obs_spec.values()), np.int32)) self._obs_spec['features'] = specs.Array(shape=(dim, ), dtype=np.float32, name='features') self._obs_spec['state'] = specs.Array( shape=self._env.physics.get_state().shape, dtype=np.float32, name='state')
def test_list_property(self): with _create_mock_connection() as connection: extension = properties.PropertiesExtension(connection) property_specs = extension.specs('baz') self.assertLen(property_specs, 1) property_spec = property_specs['baz.fiz'] self.assertTrue(property_spec.readable) self.assertFalse(property_spec.writable) self.assertFalse(property_spec.listable) self.assertEqual(specs.Array(shape=(2, 2), dtype=np.uint32), property_spec.spec)
def make_spec(obs): array = np.array(obs.observation_callable(None, None)()) shape = array.shape if obs.aggregator else (1, ) + array.shape if (isinstance(obs, BoundedGeneric) and obs.aggregator is not observable.base.AGGREGATORS['sum']): return specs.BoundedArray(shape=shape, dtype=array.dtype, minimum=obs.array_spec.minimum, maximum=obs.array_spec.maximum) else: return specs.Array(shape=shape, dtype=array.dtype)