Beispiel #1
0
    def extract_previous_action_spec(self, current_action_step):

        previous_action_spec = None
        if current_action_step == -1:
            previous_action_spec = None
        elif current_action_step == 0:
            previous_action_spec = dict()
            previous_action_spec['discrete'] = tensor_spec.BoundedTensorSpec(
                shape=(),
                dtype=np.int32,
                name='functions',
                minimum=(0, ),
                maximum=(len(self.act_wrapper.func_ids) - 1, ))
        elif current_action_step == 1:
            action_specs = self.extract_action_spec(current_action_step - 1)
            num_discrete_actions = sum([
                v.maximum[0] - v.minimum[0] + 1
                for k, v in action_specs.items()
            ])

            previous_action_spec = dict()
            previous_action_spec['discrete'] = tensor_spec.BoundedTensorSpec(
                shape=(),
                dtype=np.int32,
                name='discrete_func',
                minimum=(0, ),
                maximum=(num_discrete_actions - 1, ))
            previous_action_spec['continuous'] = tensor_spec.BoundedTensorSpec(
                shape=(2, ),
                dtype=np.float32,
                name='continuous_func',
                minimum=(-np.inf, ),
                maximum=(np.inf, ))

        return previous_action_spec
Beispiel #2
0
    def _transform_specs(self):

        # transform specs for replay_buffer and policy base class

        # only the first time_step_spec is needed as no previous_action is needed
        self._time_step_spec = self._time_step_spec[0]

        # the discrete action taken at each n step is need to compute q values,
        # so action spec is designed to take shape of num_specs
        num_actions = [int(1e5) for i in range(self._num_specs)]
        assert len(num_actions) == self._num_specs

        self._action_spec = dict()
        self._action_spec[
            self._raw_action_key] = tensor_spec.BoundedTensorSpec(
                shape=(self._num_specs, ),
                dtype=tf.int32,
                name=self._raw_action_key + 'action',
                minimum=tuple([0] * self._num_specs),
                maximum=tuple(num_actions))
        # transformed action is set to dtype float32 due to transformed coordinates
        num_transformed_actions = 3
        self._action_spec[
            self._transformed_action_key] = tensor_spec.BoundedTensorSpec(
                shape=(self._num_specs, num_transformed_actions),
                dtype=tf.float32,
                name=self._transformed_action_key + 'action',
                minimum=(0, ),
                maximum=(max(num_actions), ))

        self._policy_state_spec = self._policy_state_spec[0]
        self._info_spec = self._info_spec[0]

        return
Beispiel #3
0
    def make_spec(self, spec):
        spec = spec[0]

        default_dims = {
            'available_actions': (len(self.action_ids), ),
        }

        #        screen_shape = tuple([len(self.features['screen'])]+list(spec['feature_screen'][1:]))
        #        minimap_shape = tuple([len(self.features['minimap'])]+list(spec['feature_minimap'][1:]))

        # since all screen variables are integers, shapes are (screen_size, screen_size) instead of
        # (1, screen_size, screen_size)
        screen_shape = list(spec['feature_screen'][1:])
        minimap_shape = list(spec['feature_minimap'][1:])

        screen_dims = get_spatial_dims(self.features['screen'],
                                       features.SCREEN_FEATURES)
        minimap_dims = get_spatial_dims(self.features['minimap'],
                                        features.MINIMAP_FEATURES)
        screen_types = get_spatial_type(self.features['screen'],
                                        features.SCREEN_FEATURES)
        minimap_types = get_spatial_type(self.features['minimap'],
                                         features.MINIMAP_FEATURES)

        obs_spec = dict()
        obs_spec['screen'] = []
        obs_spec['minimap'] = []

        for feat_name, screen_dim, screen_type in zip(self.features['screen'],
                                                      screen_dims,
                                                      screen_types):
            action_dtype = np.int32 if screen_type == "CATEGORICAL" else np.float32
            spec = tensor_spec.BoundedTensorSpec(shape=screen_shape,
                                                 dtype=action_dtype,
                                                 name='screen_' + feat_name,
                                                 minimum=(0, ),
                                                 maximum=(screen_dim - 1, ))
            obs_spec['screen'].append(spec)

        for feat_name, minimap_dim, minimap_type in zip(
                self.features['minimap'], minimap_dims, minimap_types):
            action_dtype = np.int32 if minimap_type == "CATEGORICAL" else np.float32
            spec = tensor_spec.BoundedTensorSpec(shape=minimap_shape,
                                                 dtype=action_dtype,
                                                 name='minimap_' + feat_name,
                                                 minimum=(0, ),
                                                 maximum=(minimap_dim - 1, ))
            obs_spec['minimap'].append(spec)
        obs_spec['available_actions'] = tensor_spec.BoundedTensorSpec(
            shape=default_dims['available_actions'],
            dtype=np.int32,
            name='available_actions',
            minimum=(0, ),
            maximum=(1, ))

        obs_spec = list2tuple(obs_spec)
        ##TODO: implement structural observation specs

        self.obs_spec = obs_spec
Beispiel #4
0
 def testReuseSpec(self):
     spec_1 = tensor_spec.BoundedTensorSpec((1, 2),
                                            dtypes.int32,
                                            minimum=0,
                                            maximum=1)
     spec_2 = tensor_spec.BoundedTensorSpec(spec_1.shape, spec_1.dtype,
                                            spec_1.minimum, spec_1.maximum)
     self.assertEqual(spec_1, spec_2)
Beispiel #5
0
    def make_spec(self, spec, obs_spec):
        spec = spec[0]
        self._obs_spec = obs_spec[0]

        ## continous parameter space: screen, minimap, screen2,
        for t in self._spatial_action_types:
            args = getattr(spec.types, t)
            self._spatial_spec[t] = tensor_spec.BoundedTensorSpec(
                shape=args.sizes,
                dtype=np.int32,
                name=t,
                minimum=(0, ),
                maximum=(255, ))

        structured_func_args = collections.defaultdict(list)
        spatial_func_args = collections.defaultdict(list)

        for fn_id in self.func_ids:
            sc2_func = actions.FUNCTIONS[fn_id]

            check_screen = any([arg.name is 'screen' for arg in sc2_func.args])
            check_minimap = any(
                [arg.name is 'minimap' for arg in sc2_func.args])
            if check_screen and check_minimap:
                raise ValueError('one action can'
                                 't act on both screen and minimap')

            current_structured_args = []
            current_spatial_args = []

            if any([check_screen, check_minimap]):
                spatial_type = 'screen' if check_screen else 'minimap'
                for arg in sc2_func.args:
                    if arg.name not in self._spatial_action_types:
                        current_spatial_args.append(arg)
                    elif len(sc2_func.args) == 1:
                        current_spatial_args.append(arg)
                spatial_func_args[spatial_type].append(
                    (fn_id, current_spatial_args))
            else:
                for arg in sc2_func.args:
                    current_structured_args.append(arg)
                structured_func_args['structured'].append(
                    (fn_id, current_structured_args))
        self._spatial_func_args = spatial_func_args
        self._structured_func_args = structured_func_args

        self.build_spatial_action_spec()
        self.build_structured_action_spec()
        self.build_action_arg_mask()
        self._stack_id2func()
        self._func_ids_spec = tensor_spec.BoundedTensorSpec(
            shape=(1, ),
            dtype=np.int32,
            name='functions',
            minimum=0,
            maximum=len(self.func_ids) - 1)
Beispiel #6
0
 def testSerialization(self):
     nameless = tensor_spec.BoundedTensorSpec([1], np.float32, 0, 1)
     named = tensor_spec.BoundedTensorSpec([1, 2, 3],
                                           np.float32,
                                           0,
                                           1,
                                           name="some_name")
     self.assertEqual(
         nameless, trace_type.deserialize(trace_type.serialize(nameless)))
     self.assertEqual(named,
                      trace_type.deserialize(trace_type.serialize(named)))
  def testScalarBounds(self):
    spec = tensor_spec.BoundedTensorSpec(
        (), dtypes.float32, minimum=0.0, maximum=1.0)

    self.assertIsInstance(spec.minimum, np.ndarray)
    self.assertIsInstance(spec.maximum, np.ndarray)

    # Sanity check that numpy compares correctly to a scalar for an empty shape.
    self.assertEqual(0.0, spec.minimum)
    self.assertEqual(1.0, spec.maximum)

    # Check that the spec doesn't fail its own input validation.
    _ = tensor_spec.BoundedTensorSpec(
        spec.shape, spec.dtype, spec.minimum, spec.maximum)
Beispiel #8
0
    def testMasking(self):
        batch_size = 1000
        num_state_dims = 5
        num_actions = 8
        observations = tf.random.uniform([batch_size, num_state_dims])
        time_step = ts.restart(observations, batch_size=batch_size)
        input_tensor_spec = tensor_spec.TensorSpec([num_state_dims],
                                                   tf.float32)
        action_spec = tensor_spec.BoundedTensorSpec([1], tf.int32, 0,
                                                    num_actions - 1)

        mask = [0, 1, 0, 1, 0, 0, 1, 0]
        np_mask = np.array(mask)
        tf_mask = tf.constant([mask for _ in range(batch_size)])
        q_net = q_network.QNetwork(input_tensor_spec,
                                   action_spec,
                                   mask_split_fn=lambda observation:
                                   (observation, tf_mask))
        policy = q_policy.QPolicy(ts.time_step_spec(input_tensor_spec),
                                  action_spec, q_net)

        # Force creation of variables before global_variables_initializer.
        policy.variables()
        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Sample from the policy 1000 times and ensure that invalid actions are
        # never chosen.
        action_step = policy.action(time_step)
        action = self.evaluate(action_step.action)
        self.assertEqual(action.shape, (batch_size, 1))
        self.assertAllEqual(np_mask[action], np.ones([batch_size, 1]))
Beispiel #9
0
 def testEncodeDecodeBoundedTensorSpecNoName(self):
     structure = [
         tensor_spec.BoundedTensorSpec((28, 28, 3), dtypes.float64, -2,
                                       (1, 1, 20))
     ]
     self.assertTrue(self._coder.can_encode(structure))
     encoded = self._coder.encode_structure(structure)
     expected = struct_pb2.StructuredValue()
     expected_list = expected.list_value
     expected_tensor_spec = expected_list.values.add(
     ).bounded_tensor_spec_value
     expected_tensor_spec.shape.dim.add().size = 28
     expected_tensor_spec.shape.dim.add().size = 28
     expected_tensor_spec.shape.dim.add().size = 3
     expected_tensor_spec.name = ""
     expected_tensor_spec.dtype = dtypes.float64.as_datatype_enum
     expected_tensor_spec.minimum.CopyFrom(
         tensor_util.make_tensor_proto([-2], dtype=dtypes.float64,
                                       shape=[]))
     expected_tensor_spec.maximum.CopyFrom(
         tensor_util.make_tensor_proto([1, 1, 20],
                                       dtype=dtypes.float64,
                                       shape=[3]))
     self.assertEqual(expected, encoded)
     decoded = self._coder.decode_proto(encoded)
     self.assertEqual(structure, decoded)
Beispiel #10
0
def make_server():
    return reverb_server.Server(
        tables=[
            reverb_server.Table(
                'dist',
                sampler=item_selectors.Prioritized(priority_exponent=1),
                remover=item_selectors.Fifo(),
                max_size=1000000,
                rate_limiter=rate_limiters.MinSize(1)),
            reverb_server.Table(
                'signatured',
                sampler=item_selectors.Prioritized(priority_exponent=1),
                remover=item_selectors.Fifo(),
                max_size=1000000,
                rate_limiter=rate_limiters.MinSize(1),
                signature=tf.TensorSpec(dtype=tf.float32, shape=(None, None))),
            reverb_server.Table(
                'bounded_spec_signatured',
                sampler=item_selectors.Prioritized(priority_exponent=1),
                remover=item_selectors.Fifo(),
                max_size=1000000,
                rate_limiter=rate_limiters.MinSize(1),
                # Currently only the `shape` and `dtype` of the bounded spec
                # is considered during signature check.
                # TODO(b/158033101): Check the boundaries as well.
                signature=tensor_spec.BoundedTensorSpec(dtype=tf.float32,
                                                        shape=(None, None),
                                                        minimum=(0.0, 0.0),
                                                        maximum=(10.0, 10.)),
            ),
        ],
        port=None,
    )
Beispiel #11
0
 def observation_spec(self):
     obs = super(NormalizeWrapper,self).observation_spec()
     maxi = tf.ones([len(self.upper)])
     mini = tf.zeros([len(self .upper)])
     new_obs = tensor_spec.BoundedTensorSpec(shape=maxi.shape,dtype=obs.dtype,
                                             name=obs.name,minimum=mini,maximum=maxi)
     return new_obs
Beispiel #12
0
 def testFromBoundedTensorSpec(self):
     bounded_spec = tensor_spec.BoundedTensorSpec((1, 2), dtypes.int32, 0,
                                                  1)
     spec = tensor_spec.TensorSpec.from_spec(bounded_spec)
     self.assertEqual(bounded_spec.shape, spec.shape)
     self.assertEqual(bounded_spec.dtype, spec.dtype)
     self.assertEqual(bounded_spec.name, spec.name)
 def testMinimumMaximumAttributes(self):
   spec = tensor_spec.BoundedTensorSpec(
       (1, 2, 3), dtypes.float32, 0, (5, 5, 5))
   self.assertEqual(type(spec.minimum), np.ndarray)
   self.assertEqual(type(spec.maximum), np.ndarray)
   self.assertAllEqual(spec.minimum, np.array(0, dtype=np.float32))
   self.assertAllEqual(spec.maximum, np.array([5, 5, 5], dtype=np.float32))
Beispiel #14
0
 def testNotWriteableNP(self):
     spec = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, 0,
                                          (5, 5, 5))
     with self.assertRaisesRegexp(ValueError, "read-only"):
         spec.minimum[0] = -1
     with self.assertRaisesRegexp(ValueError, "read-only"):
         spec.maximum[0] = 100
Beispiel #15
0
 def time_step_spec(self):
     timestepspec = super(TFhistoryWrapper,self).time_step_spec()
     base = timestepspec.observation
     maxi = np.tile( base.maximum,self.history_n)
     mini = np.tile( base.minimum,self.history_n)
     new_obs = tensor_spec.BoundedTensorSpec(shape=maxi.shape,dtype=base.dtype,name=base.name,minimum=mini,maximum=maxi)
     return ts.TimeStep(timestepspec.step_type, timestepspec.reward, timestepspec.discount,observation = new_obs)
Beispiel #16
0
    def extract_action_spec(self, step):

        action_specs = [
            self.act_wrapper._merged_spatial_action_spec,
            self.act_wrapper._merged_structured_action_spec
        ]

        if step < -1:
            print("generate None spec for step < -1")
            return None

        action_spec = dict()
        if step >= 0:
            for spec in action_specs:
                for k, v in spec.items():
                    if step < len(v):
                        action_spec[k] = v[step]
        else:
            action_spec['structured'] = tensor_spec.BoundedTensorSpec(
                shape=(),
                dtype=np.int32,
                name="sc2_func_action_spec",
                minimum=(0, ),
                maximum=(len(self.act_wrapper.func_ids) - 1, ))

        return action_spec
Beispiel #17
0
 def testFromBoundedTensorSpec(self):
     spec_1 = tensor_spec.BoundedTensorSpec((1, 2),
                                            dtypes.int32,
                                            minimum=0,
                                            maximum=1)
     spec_2 = tensor_spec.BoundedTensorSpec.from_spec(spec_1)
     self.assertEqual(spec_1, spec_2)
Beispiel #18
0
 def testMultipleActionsRaiseError(self):
     action_spec = [tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1)] * 2
     with self.assertRaisesRegexp(
             NotImplementedError,
             'action_spec can only contain a single BoundedTensorSpec'):
         hetero_q_policy.HeteroQPolicy(self._time_step_spec,
                                       action_spec,
                                       q_network=DummyNet())
Beispiel #19
0
    def testActionSpecsIncompatible(self):
        network_action_spec = tensor_spec.BoundedTensorSpec([2], tf.int32, 0,
                                                            1)
        q_net = DummyNetWithActionSpec(network_action_spec)

        with self.assertRaisesRegexp(
                ValueError,
                'action_spec must be compatible with q_network.action_spec'):
            q_policy.QPolicy(self._time_step_spec, self._action_spec, q_net)
Beispiel #20
0
 def build_obs_spec(self,obs):
     if not self.isAtari:
         maxi = np.tile( obs.maximum,self.history_n)
         mini = np.tile( obs.minimum,self.history_n)
     else:
         maxi = np.tile( obs.maximum,self.history_n*obs.shape[0])
         mini = np.tile( obs.minimum,self.history_n*obs.shape[0])
     new_obs = tensor_spec.BoundedTensorSpec(shape=maxi.shape,dtype=tf.float32,name=obs.name,minimum=mini,maximum=maxi)
     return new_obs
 def do_decode(self, value, decode_fn):
   btsv = value.bounded_tensor_spec_value
   name = btsv.name
   return tensor_spec.BoundedTensorSpec(
       shape=decode_fn(
           struct_pb2.StructuredValue(tensor_shape_value=btsv.shape)),
       dtype=decode_fn(
           struct_pb2.StructuredValue(tensor_dtype_value=btsv.dtype)),
       minimum=tensor_util.MakeNdarray(btsv.minimum),
       maximum=tensor_util.MakeNdarray(btsv.maximum),
       name=(name if name else None))
Beispiel #22
0
    def testEquality(self):
        spec_1_1 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, 0,
                                                 (5, 5, 5))
        spec_1_2 = tensor_spec.BoundedTensorSpec(
            (1, 2, 3), dtypes.float32, 0.00000001, (5, 5, 5.00000000000000001))
        spec_2_1 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, 1,
                                                 (5, 5, 5))
        spec_2_2 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32,
                                                 (1, 1, 1), (5, 5, 5))
        spec_2_3 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32,
                                                 (1, 1, 1), 5)
        spec_3_1 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32,
                                                 (2, 1, 1), (5, 5, 5))

        self.assertEqual(spec_1_1, spec_1_2)
        self.assertEqual(spec_1_2, spec_1_1)

        self.assertNotEqual(spec_1_1, spec_2_2)
        self.assertNotEqual(spec_1_1, spec_2_1)
        self.assertNotEqual(spec_2_2, spec_1_1)
        self.assertNotEqual(spec_2_1, spec_1_1)

        self.assertEqual(spec_2_1, spec_2_2)
        self.assertEqual(spec_2_2, spec_2_1)
        self.assertEqual(spec_2_2, spec_2_3)

        self.assertNotEqual(spec_1_1, spec_3_1)
        self.assertNotEqual(spec_2_1, spec_3_1)
        self.assertNotEqual(spec_2_2, spec_3_1)
  def test_sets_dtypes_from_bounded_spec_signature(self):
    bounded_spec_signature = {
        'a': {
            'b': tensor_spec.BoundedTensorSpec([3, 3], tf.float32, 0, 3),
            'c': tensor_spec.BoundedTensorSpec([], tf.int64, 0, 5),
        },
    }

    server = reverb_server.Server([
        reverb_server.Table.queue(
            'queue', 10, signature=bounded_spec_signature)
    ])

    dataset = reverb_dataset.ReplayDataset.from_table_signature(
        f'localhost:{server.port}', 'queue', 100)
    self.assertDictEqual(
        dataset.element_spec.data, {
            'a': {
                'b': tf.TensorSpec([3, 3], tf.float32),
                'c': tf.TensorSpec([], tf.int64),
            },
        })
Beispiel #24
0
    def extract_func_action_spec(self, current_action_step):

        if current_action_step == -1:
            func_action_spec = None
        else:
            func_action_spec = dict()
            func_action_spec['discrete'] = tensor_spec.BoundedTensorSpec(
                shape=(),
                dtype=np.int32,
                name='functions',
                minimum=(0, ),
                maximum=(len(self.act_wrapper.func_ids) - 1, ))

        return func_action_spec
Beispiel #25
0
    def testActionScalarSpec(self):
        action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 1)
        policy = q_policy.QPolicy(self._time_step_spec,
                                  action_spec,
                                  q_network=DummyNet())

        observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
        time_step = ts.restart(observations, batch_size=2)
        action_step = policy.action(time_step, seed=1)
        self.assertEqual(action_step.action.shape.as_list(), [2])
        self.assertEqual(action_step.action.dtype, tf.int32)
        # Initialize all variables
        self.evaluate(tf.compat.v1.global_variables_initializer())
        action = self.evaluate(action_step.action)
        self.assertTrue(np.all(action >= 0) and np.all(action <= 1))
Beispiel #26
0
 def testActionList(self):
     action_spec = [tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1)]
     policy = q_policy.QPolicy(self._time_step_spec,
                               action_spec,
                               q_network=DummyNet())
     observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
     time_step = ts.restart(observations, batch_size=2)
     action_step = policy.action(time_step, seed=1)
     self.assertIsInstance(action_step.action, list)
     self.evaluate(tf.compat.v1.global_variables_initializer())
     action = self.evaluate(action_step.action)
     self.assertLen(action, 1)
     # Extract contents from the outer list.
     action = action[0]
     self.assertTrue(np.all(action >= 0) and np.all(action <= 1))
Beispiel #27
0
    def testActionWithinBounds(self):
        bounded_action_spec = tensor_spec.BoundedTensorSpec([1],
                                                            tf.int32,
                                                            minimum=-6,
                                                            maximum=-5)
        policy = q_policy.QPolicy(self._time_step_spec,
                                  bounded_action_spec,
                                  q_network=DummyNet())

        observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
        time_step = ts.restart(observations, batch_size=2)
        action_step = policy.action(time_step)
        self.assertEqual(action_step.action.shape.as_list(), [2, 1])
        self.assertEqual(action_step.action.dtype, tf.int32)
        # Initialize all variables
        self.evaluate(tf.compat.v1.global_variables_initializer())
        action = self.evaluate(action_step.action)
        self.assertTrue(np.all(action <= -5) and np.all(action >= -6))
Beispiel #28
0
    def build_structured_action_spec(self):

        merged_structured_action_spec = collections.defaultdict(list)
        for action_type, func_args in self._structured_func_args.items():
            max_num_args = max([len(arg[1]) for arg in func_args])
            args_per_step = collections.defaultdict(set)

            action_shape = (1, )
            action_dtype = tf.int32

            for args in func_args:
                for i, arg in enumerate(args[1]):
                    args_per_step[i].add(arg)

            idx2arg, arg2idx = dict(), dict()
            for step_i in range(max_num_args):
                idx2arg[step_i], arg2idx[step_i] = dict(), dict()
                args_per_step[step_i] = list(args_per_step[step_i])
                idx = 0
                for arg in args_per_step[step_i]:
                    if arg.name is not 'screen2':
                        assert len(arg.sizes) == 1
                        arg2idx[step_i][arg] = (idx, idx + arg.sizes[0])
                        for j in range(arg.sizes[0]):
                            idx2arg[step_i][idx] = (arg, j)
                            idx += 1
                    else:
                        arg2idx[step_i][arg] = (idx, idx + 1)
                        idx2arg[step_i][idx] = (arg, 0)
                        idx += 1
                combined_spec = tensor_spec.BoundedTensorSpec(
                    shape=action_shape,
                    dtype=action_dtype,
                    name=action_type + '_step_' + str(step_i),
                    minimum=(0, ),
                    maximum=(idx - 1, ))
                merged_structured_action_spec[action_type].append(
                    combined_spec)
            self._structured_id2func[action_type] = idx2arg
            self._structured_func2id[action_type] = arg2idx
        self._merged_structured_action_spec = list2tuple(
            merged_structured_action_spec)

        return
Beispiel #29
0
    def _transform_action_spec(self, time_step_spec, action_spec, spatial_names, structured_names):

        num_actions = 0
        for name in spatial_names:
            assert name in time_step_spec.observation
            height = time_step_spec.observation[name][0].shape[0]
            width = time_step_spec.observation[name][0].shape[1]
            if name in action_spec:
                num_actions += height*width*(action_spec[name].maximum[0]-action_spec[name].minimum[0]+1)
        for name in structured_names:
            if name in action_spec:
                num_actions += action_spec[name].maximum[0]-action_spec[name].minimum[0]+1

        # include no-op action
        self._total_num_actions = num_actions + 1

        return tensor_spec.BoundedTensorSpec(
            shape=(), dtype=np.int32, name='combined_action_spec',
            minimum=(0,),
            maximum=(num_actions - 1,))
 def testEncodeDecodeBoundedTensorSpec(self):
   structure = [
       tensor_spec.BoundedTensorSpec([1, 2, 3], dtypes.int64, 0, 10,
                                     "hello-0-10")
   ]
   self.assertTrue(self._coder.can_encode(structure))
   encoded = self._coder.encode_structure(structure)
   expected = struct_pb2.StructuredValue()
   expected_list = expected.list_value
   expected_tensor_spec = expected_list.values.add().bounded_tensor_spec_value
   expected_tensor_spec.shape.dim.add().size = 1
   expected_tensor_spec.shape.dim.add().size = 2
   expected_tensor_spec.shape.dim.add().size = 3
   expected_tensor_spec.name = "hello-0-10"
   expected_tensor_spec.dtype = dtypes.int64.as_datatype_enum
   expected_tensor_spec.minimum.CopyFrom(
       tensor_util.make_tensor_proto([0], dtype=dtypes.int64, shape=[]))
   expected_tensor_spec.maximum.CopyFrom(
       tensor_util.make_tensor_proto([10], dtype=dtypes.int64, shape=[]))
   self.assertEqual(expected, encoded)
   decoded = self._coder.decode_proto(encoded)
   self.assertEqual(structure, decoded)