Esempio n. 1
0
    def action_spec(self) -> t.Tuple[specs.DiscreteArray, specs.DiscreteArray]:
        N = 9  # 1-hop neighbourhood

        return (
            specs.DiscreteArray(num_values=N, dtype=np.int8, name='P-action'),
            specs.DiscreteArray(num_values=N, dtype=np.int8, name='D-action'),
        )
Esempio n. 2
0
    def __init__(self,
                 scaling_factor=1.,
                 action_layers='agent',
                 control_velocity=False,
                 momentum=0.):
        """Constructor.
        
        Args:
            scaling_factor: Scalar. Scaling factor multiplied to the action.
            agent_layer: String or iterable of strings. Elements (or itself if
                string) must be keys in the environment state. All sprites in
                these layers will be acted upon by this action space.
            control_velocity: Bool. Whether to control velocity (True) or force
                (False).
            momentum: Float in [0, 1]. Discount factor for previous action. This
                should be zero if control_velocity is False, because imparting
                forces automatically gives momentum to the agent(s) being
                controlled. If control_velocity is True, setting this greater
                than zero gives the controlled agent(s) momentum. However, the
                velocity is clipped at scaling_factor, so the agent only retains
                momentum when stopping or changing direction and does not
                accelerate.
        """
        self._scaling_factor = scaling_factor
        if not isinstance(action_layers, (list, tuple)):
            action_layers = (action_layers, )
        self._action_layers = action_layers
        self._control_velocity = control_velocity
        self._momentum = momentum

        self._action_spec = specs.DiscreteArray(len(self._ACTIONS))
Esempio n. 3
0
def tensor_spec_to_dm_env_spec(
        tensor_spec: dm_env_rpc_pb2.TensorSpec) -> specs.Array:
    """Returns a dm_env spec given a dm_env_rpc TensorSpec.

  Args:
    tensor_spec: A dm_env_rpc TensorSpec protobuf.

  Returns:
    Either a DiscreteArray, BoundedArray, StringArray or Array, depending on the
    content of the TensorSpec.
  """
    np_type = tensor_utils.data_type_to_np_type(tensor_spec.dtype)
    if tensor_spec.HasField('min') or tensor_spec.HasField('max'):
        bounds = tensor_spec_utils.bounds(tensor_spec)

        if (not tensor_spec.shape and np.issubdtype(np_type, np.integer)
                and bounds.min == 0 and tensor_spec.HasField('max')):
            return specs.DiscreteArray(num_values=bounds.max + 1,
                                       dtype=np_type,
                                       name=tensor_spec.name)
        else:
            return specs.BoundedArray(shape=tensor_spec.shape,
                                      dtype=np_type,
                                      name=tensor_spec.name,
                                      minimum=bounds.min,
                                      maximum=bounds.max)
    else:
        if tensor_spec.dtype == dm_env_rpc_pb2.DataType.STRING:
            return specs.StringArray(shape=tensor_spec.shape,
                                     name=tensor_spec.name)
        else:
            return specs.Array(shape=tensor_spec.shape,
                               dtype=np_type,
                               name=tensor_spec.name)
Esempio n. 4
0
 def testProperties(self):
     num_values = 5
     spec = specs.DiscreteArray(num_values=5)
     self.assertEqual(spec.minimum, 0)
     self.assertEqual(spec.maximum, num_values - 1)
     self.assertEqual(spec.dtype, np.int32)
     self.assertEqual(spec.num_values, num_values)
Esempio n. 5
0
 def testReplace(self, arg_name, new_value):
     old_spec = specs.DiscreteArray(2, np.int32, "test")
     new_spec = old_spec.replace(**{arg_name: new_value})
     self.assertIsNot(old_spec, new_spec)
     self.assertEqual(getattr(new_spec, arg_name), new_value)
     for attr_name in set(["num_values", "dtype",
                           "name"]).difference([arg_name]):
         self.assertEqual(getattr(new_spec, attr_name),
                          getattr(old_spec, attr_name))
Esempio n. 6
0
  def __init__(self, step_size=0.05, motion_cost=0.):
    """Constructor.

    Args:
      step_size: Fraction of the arena width the sprite moves for each step.
      motion_cost: Each step incurs cost motion_cost * step_size.
    """
    self._step_size = step_size
    self._motion_cost = motion_cost
    self._action_spec = [
        specs.DiscreteArray(num_values=2, dtype=np.int64),
        specs.DiscreteArray(num_values=4, dtype=np.int64),
    ]
    self.action_to_motion = {
        0: np.array([0, -self._step_size]),
        1: np.array([-self._step_size, 0]),
        2: np.array([0, self._step_size]),
        3: np.array([self._step_size, 0]),
    }
Esempio n. 7
0
    def __init__(self,
                 episode_length,
                 canvas_width,
                 grid_width,
                 brush_sizes,
                 rewards=None,
                 discount=1.,
                 shaders_basedir=""):
        self._name = "fluid_paint"

        if brush_sizes is None:
            self._brush_sizes = [10.0, 30.0, 50.0]
        else:
            self._brush_sizes = brush_sizes

        self._canvas_width = canvas_width
        self._grid_width = grid_width
        self._grid_size = grid_width * grid_width
        self._rewards = rewards

        # Build action specification and action masks.
        self._action_spec = collections.OrderedDict([
            ("control", specs.DiscreteArray(self._grid_size)),
            ("end", specs.DiscreteArray(self._grid_size)),
            ("flag", specs.DiscreteArray(2)),
            ("speed", specs.DiscreteArray(len(self.STROKES_PER_STEP))),
            ("size", specs.DiscreteArray(len(self._brush_sizes))),
            ("red", specs.DiscreteArray(len(self.R_VALUES))),
            ("green", specs.DiscreteArray(len(self.G_VALUES))),
            ("blue", specs.DiscreteArray(len(self.B_VALUES))),
            ("alpha", specs.DiscreteArray(len(self.A_VALUES)))
        ])
        self._action_masks = copy.deepcopy(self.ACTION_MASKS)

        self._brush_params = None
        self._prev_reward = 0

        config = config_pb2.Config()

        self._wrapper = pyfluid.Wrapper(config.SerializeToString())
        self._wrapper.Setup(self._canvas_width, self._canvas_width,
                            shaders_basedir)

        self._episode_step = 0
        self._episode_length = episode_length
        self._prev_step_type = None
        self._discount = discount
Esempio n. 8
0
def space2spec(space: gym.Space, name: str = None):
    """Converts an OpenAI Gym space to a dm_env spec or nested structure of specs.

  Box, MultiBinary and MultiDiscrete Gym spaces are converted to BoundedArray
  specs. Discrete OpenAI spaces are converted to DiscreteArray specs. Tuple and
  Dict spaces are recursively converted to tuples and dictionaries of specs.

  Args:
    space: The Gym space to convert.
    name: Optional name to apply to all return spec(s).

  Returns:
    A dm_env spec or nested structure of specs, corresponding to the input
    space.
  """
    if isinstance(space, spaces.Discrete):
        return specs.DiscreteArray(num_values=space.n,
                                   dtype=space.dtype,
                                   name=name)

    elif isinstance(space, spaces.Box):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=space.low,
                                  maximum=space.high,
                                  name=name)

    elif isinstance(space, spaces.MultiBinary):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=0.0,
                                  maximum=1.0,
                                  name=name)

    elif isinstance(space, spaces.MultiDiscrete):
        return specs.BoundedArray(shape=space.shape,
                                  dtype=space.dtype,
                                  minimum=np.zeros(space.shape),
                                  maximum=space.nvec,
                                  name=name)

    elif isinstance(space, spaces.Tuple):
        return tuple(space2spec(s, name) for s in space.spaces)

    elif isinstance(space, spaces.Dict):
        return {
            key: space2spec(value, name)
            for key, value in space.spaces.items()
        }

    else:
        raise ValueError('Unexpected gym space: {}'.format(space))
Esempio n. 9
0
    def observation_spec(
        self
    ) -> t.Tuple[specs.Array, specs.DiscreteArray, specs.DiscreteArray,
                 t.Optional[specs.DiscreteArray]]:
        N = len(self._sim.zone_dict)
        N_lives = self._sim.max_n_lives

        return (
            specs.Array(shape=(N, 2), dtype=np.int16, name='Zones'),

            specs.DiscreteArray(num_values=N,
                                dtype=np.int16,
                                name='P-zone'),

            specs.DiscreteArray(num_values=N,
                                dtype=np.int16,
                                name='D-zone'),

            specs.DiscreteArray(num_values=N_lives + 1,
                                dtype=np.int16,
                                name='Lives') \
                                if N_lives else None
        )
Esempio n. 10
0
    def action_spec(self, *args, **kwargs):
        action_spec = {}
        # The action spec of the unity_environment is documented in
        # unity/environment.py.
        unity_action_spec = self._unity_environment.action_spec()
        action_spec["Horizontal"] = unity_action_spec["SetPosX"]
        action_spec["Vertical"] = unity_action_spec["SetPosY"]
        action_spec["Sticky"] = specs.DiscreteArray(num_values=2)
        action_spec["Selector"] = specs.BoundedArray(
            [],
            dtype=np.float32,
            minimum=-self._display_limit,
            maximum=self._display_limit)

        return action_spec
Esempio n. 11
0
    def test_scalar_with_0_n_bounds_gives_discrete_array(self):
        tensor_spec = dm_env_rpc_pb2.TensorSpec()
        tensor_spec.dtype = dm_env_rpc_pb2.DataType.UINT32
        tensor_spec.name = 'foo'

        max_value = 9
        tensor_spec.min.uint32s.array[:] = [0]
        tensor_spec.max.uint32s.array[:] = [max_value]
        actual = dm_env_utils.tensor_spec_to_dm_env_spec(tensor_spec)
        expected = specs.DiscreteArray(num_values=max_value + 1,
                                       dtype=np.uint32,
                                       name='foo')
        self.assertEqual(expected, actual)
        self.assertEqual(0, actual.minimum)
        self.assertEqual(max_value, actual.maximum)
        self.assertEqual('foo', actual.name)
Esempio n. 12
0
 def action_spec(self) -> specs.DiscreteArray:
     return specs.DiscreteArray(9, name="action")
Esempio n. 13
0
 def action_spec(self) -> specs.DiscreteArray:
     """Returns the action spec."""
     return specs.DiscreteArray(dtype=np.int,
                                num_values=len(_ACTIONS),
                                name="action")
Esempio n. 14
0
 def action_spec(self):
     return specs.DiscreteArray(2, name='action')
Esempio n. 15
0
 def testDtypeNotIntegral(self, dtype):
     with self.assertRaisesWithLiteralMatch(
             ValueError, specs._DTYPE_NOT_INTEGRAL.format(dtype)):
         specs.DiscreteArray(num_values=5, dtype=dtype)
 def action_spec(self) -> dm_specs.DiscreteArray:
     """Returns the action spec."""
     return dm_specs.DiscreteArray(self.max_moves,
                                   dtype=np.int,
                                   name="action")
Esempio n. 17
0
 def action_spec(self):
     return specs.DiscreteArray(dtype=np.int, num_values=3, name='action')
Esempio n. 18
0
 def testSerialization(self):
     desc = specs.DiscreteArray(2, np.int32, "test")
     self.assertEqual(pickle.loads(pickle.dumps(desc)), desc)
 def observation_spec(self):
     """Returns the observation spec."""
     return specs.DiscreteArray(dtype=int, num_values=2, name="board")
Esempio n. 20
0
 def action_spec(self):
     return specs.DiscreteArray(num_values=10, name='action')
Esempio n. 21
0
 def testRepr(self):
     as_string = repr(specs.DiscreteArray(num_values=5))
     self.assertIn("num_values=5", as_string)
Esempio n. 22
0
 def testDtypeOverflow(self, num_values, dtype):
     with self.assertRaisesWithLiteralMatch(
             ValueError,
             specs._DTYPE_OVERFLOW.format(np.dtype(dtype), num_values)):
         specs.DiscreteArray(num_values=num_values, dtype=dtype)
Esempio n. 23
0
 def observation_spec(self) -> specs.DiscreteArray:
     return specs.DiscreteArray(sum(self.shape), name="observation")
Esempio n. 24
0
 def action_spec(self):
     return specs.DiscreteArray(self._n_actions, name='action')
 def action_spec(self):
     return specs.DiscreteArray(self._env.action_space.n)
 def action_spec(self):
     return specs.DiscreteArray(dtype=int, num_values=0, name="action")
Esempio n. 27
0
    def __init__(self,
                 episode_length,
                 canvas_width,
                 grid_width,
                 brush_type,
                 brush_sizes,
                 use_color,
                 use_pressure=True,
                 use_alpha=False,
                 background="white",
                 rewards=None,
                 discount=1.,
                 brushes_basedir=""):
        self._name = "libmypaint"

        if brush_sizes is None:
            brush_sizes = [1, 2, 3]

        self._canvas_width = canvas_width
        self._grid_width = grid_width
        self._grid_size = grid_width * grid_width
        self._use_color = use_color
        self._use_alpha = use_alpha
        if not self._use_color:
            self._output_channels = 1
        elif not self._use_alpha:
            self._output_channels = 3
        else:
            self._output_channels = 4
        self._use_pressure = use_pressure
        assert np.all(np.array(brush_sizes) > 0.)
        self._log_brush_sizes = [np.log(float(i)) for i in brush_sizes]
        self._rewards = rewards

        # Build action specification and action masks.
        self._action_spec = collections.OrderedDict([
            ("control", specs.DiscreteArray(self._grid_size)),
            ("end", specs.DiscreteArray(self._grid_size)),
            ("flag", specs.DiscreteArray(2)),
            ("pressure", specs.DiscreteArray(len(self.P_VALUES))),
            ("size", specs.DiscreteArray(len(self._log_brush_sizes))),
            ("red", specs.DiscreteArray(len(self.R_VALUES))),
            ("green", specs.DiscreteArray(len(self.G_VALUES))),
            ("blue", specs.DiscreteArray(len(self.B_VALUES)))
        ])
        self._action_masks = copy.deepcopy(self.ACTION_MASKS)

        def remove_action_mask(name):
            for k in self._action_masks.keys():
                del self._action_masks[k][name]

        if not self._use_pressure:
            del self._action_spec["pressure"]
            remove_action_mask("pressure")

        if len(self._log_brush_sizes) > 1:
            self._use_size = True
        else:
            del self._action_spec["size"]
            remove_action_mask("size")
            self._use_size = False

        if not self._use_color:
            for k in self.COLOR_ACTIONS:
                del self._action_spec[k]
                remove_action_mask(k)

        # Setup the painting surface.
        if background == "white":
            background = pylibmypaint.SurfaceWrapper.Background.kWhite
        elif background == "transparent":
            background = pylibmypaint.SurfaceWrapper.Background.kBlack
        else:
            raise ValueError("Invalid background type: {}".format(background))
        self._surface = pylibmypaint.SurfaceWrapper(self._canvas_width,
                                                    self._canvas_width,
                                                    background)

        # Setup the brush.
        self._brush = pylibmypaint.BrushWrapper()
        self._brush.SetSurface(self._surface)
        self._brush.LoadFromFile(
            os.path.join(brushes_basedir, "brushes/{}.myb".format(brush_type)))

        self._episode_step = 0
        self._episode_length = episode_length
        self._prev_step_type = None
        self._discount = discount
Esempio n. 28
0
 def action_spec(self) -> specs.DiscreteArray:
     action_spec = self._environment.action_spec()[0]
     return specs.DiscreteArray(num_values=action_spec.maximum.item() + 1,
                                dtype=action_spec.dtype,
                                name='action_spec')
Esempio n. 29
0
 def action_spec(self) -> specs.DiscreteArray:
     raw_spec = self._environment.action_spec()[0]
     return specs.DiscreteArray(num_values=raw_spec.maximum.item() -
                                raw_spec.minimum.item() + 1)
Esempio n. 30
0
 def testInvalidNumActions(self, num_values):
     with self.assertRaisesWithLiteralMatch(
             ValueError, specs._NUM_VALUES_NOT_POSITIVE.format(num_values)):
         specs.DiscreteArray(num_values=num_values)