def extract_previous_action_spec(self, current_action_step): previous_action_spec = None if current_action_step == -1: previous_action_spec = None elif current_action_step == 0: previous_action_spec = dict() previous_action_spec['discrete'] = tensor_spec.BoundedTensorSpec( shape=(), dtype=np.int32, name='functions', minimum=(0, ), maximum=(len(self.act_wrapper.func_ids) - 1, )) elif current_action_step == 1: action_specs = self.extract_action_spec(current_action_step - 1) num_discrete_actions = sum([ v.maximum[0] - v.minimum[0] + 1 for k, v in action_specs.items() ]) previous_action_spec = dict() previous_action_spec['discrete'] = tensor_spec.BoundedTensorSpec( shape=(), dtype=np.int32, name='discrete_func', minimum=(0, ), maximum=(num_discrete_actions - 1, )) previous_action_spec['continuous'] = tensor_spec.BoundedTensorSpec( shape=(2, ), dtype=np.float32, name='continuous_func', minimum=(-np.inf, ), maximum=(np.inf, )) return previous_action_spec
def _transform_specs(self): # transform specs for replay_buffer and policy base class # only the first time_step_spec is needed as no previous_action is needed self._time_step_spec = self._time_step_spec[0] # the discrete action taken at each n step is need to compute q values, # so action spec is designed to take shape of num_specs num_actions = [int(1e5) for i in range(self._num_specs)] assert len(num_actions) == self._num_specs self._action_spec = dict() self._action_spec[ self._raw_action_key] = tensor_spec.BoundedTensorSpec( shape=(self._num_specs, ), dtype=tf.int32, name=self._raw_action_key + 'action', minimum=tuple([0] * self._num_specs), maximum=tuple(num_actions)) # transformed action is set to dtype float32 due to transformed coordinates num_transformed_actions = 3 self._action_spec[ self._transformed_action_key] = tensor_spec.BoundedTensorSpec( shape=(self._num_specs, num_transformed_actions), dtype=tf.float32, name=self._transformed_action_key + 'action', minimum=(0, ), maximum=(max(num_actions), )) self._policy_state_spec = self._policy_state_spec[0] self._info_spec = self._info_spec[0] return
def make_spec(self, spec): spec = spec[0] default_dims = { 'available_actions': (len(self.action_ids), ), } # screen_shape = tuple([len(self.features['screen'])]+list(spec['feature_screen'][1:])) # minimap_shape = tuple([len(self.features['minimap'])]+list(spec['feature_minimap'][1:])) # since all screen variables are integers, shapes are (screen_size, screen_size) instead of # (1, screen_size, screen_size) screen_shape = list(spec['feature_screen'][1:]) minimap_shape = list(spec['feature_minimap'][1:]) screen_dims = get_spatial_dims(self.features['screen'], features.SCREEN_FEATURES) minimap_dims = get_spatial_dims(self.features['minimap'], features.MINIMAP_FEATURES) screen_types = get_spatial_type(self.features['screen'], features.SCREEN_FEATURES) minimap_types = get_spatial_type(self.features['minimap'], features.MINIMAP_FEATURES) obs_spec = dict() obs_spec['screen'] = [] obs_spec['minimap'] = [] for feat_name, screen_dim, screen_type in zip(self.features['screen'], screen_dims, screen_types): action_dtype = np.int32 if screen_type == "CATEGORICAL" else np.float32 spec = tensor_spec.BoundedTensorSpec(shape=screen_shape, dtype=action_dtype, name='screen_' + feat_name, minimum=(0, ), maximum=(screen_dim - 1, )) obs_spec['screen'].append(spec) for feat_name, minimap_dim, minimap_type in zip( self.features['minimap'], minimap_dims, minimap_types): action_dtype = np.int32 if minimap_type == "CATEGORICAL" else np.float32 spec = tensor_spec.BoundedTensorSpec(shape=minimap_shape, dtype=action_dtype, name='minimap_' + feat_name, minimum=(0, ), maximum=(minimap_dim - 1, )) obs_spec['minimap'].append(spec) obs_spec['available_actions'] = tensor_spec.BoundedTensorSpec( shape=default_dims['available_actions'], dtype=np.int32, name='available_actions', minimum=(0, ), maximum=(1, )) obs_spec = list2tuple(obs_spec) ##TODO: implement structural observation specs self.obs_spec = obs_spec
def testReuseSpec(self): spec_1 = tensor_spec.BoundedTensorSpec((1, 2), dtypes.int32, minimum=0, maximum=1) spec_2 = tensor_spec.BoundedTensorSpec(spec_1.shape, spec_1.dtype, spec_1.minimum, spec_1.maximum) self.assertEqual(spec_1, spec_2)
def make_spec(self, spec, obs_spec): spec = spec[0] self._obs_spec = obs_spec[0] ## continous parameter space: screen, minimap, screen2, for t in self._spatial_action_types: args = getattr(spec.types, t) self._spatial_spec[t] = tensor_spec.BoundedTensorSpec( shape=args.sizes, dtype=np.int32, name=t, minimum=(0, ), maximum=(255, )) structured_func_args = collections.defaultdict(list) spatial_func_args = collections.defaultdict(list) for fn_id in self.func_ids: sc2_func = actions.FUNCTIONS[fn_id] check_screen = any([arg.name is 'screen' for arg in sc2_func.args]) check_minimap = any( [arg.name is 'minimap' for arg in sc2_func.args]) if check_screen and check_minimap: raise ValueError('one action can' 't act on both screen and minimap') current_structured_args = [] current_spatial_args = [] if any([check_screen, check_minimap]): spatial_type = 'screen' if check_screen else 'minimap' for arg in sc2_func.args: if arg.name not in self._spatial_action_types: current_spatial_args.append(arg) elif len(sc2_func.args) == 1: current_spatial_args.append(arg) spatial_func_args[spatial_type].append( (fn_id, current_spatial_args)) else: for arg in sc2_func.args: current_structured_args.append(arg) structured_func_args['structured'].append( (fn_id, current_structured_args)) self._spatial_func_args = spatial_func_args self._structured_func_args = structured_func_args self.build_spatial_action_spec() self.build_structured_action_spec() self.build_action_arg_mask() self._stack_id2func() self._func_ids_spec = tensor_spec.BoundedTensorSpec( shape=(1, ), dtype=np.int32, name='functions', minimum=0, maximum=len(self.func_ids) - 1)
def testSerialization(self): nameless = tensor_spec.BoundedTensorSpec([1], np.float32, 0, 1) named = tensor_spec.BoundedTensorSpec([1, 2, 3], np.float32, 0, 1, name="some_name") self.assertEqual( nameless, trace_type.deserialize(trace_type.serialize(nameless))) self.assertEqual(named, trace_type.deserialize(trace_type.serialize(named)))
def testScalarBounds(self): spec = tensor_spec.BoundedTensorSpec( (), dtypes.float32, minimum=0.0, maximum=1.0) self.assertIsInstance(spec.minimum, np.ndarray) self.assertIsInstance(spec.maximum, np.ndarray) # Sanity check that numpy compares correctly to a scalar for an empty shape. self.assertEqual(0.0, spec.minimum) self.assertEqual(1.0, spec.maximum) # Check that the spec doesn't fail its own input validation. _ = tensor_spec.BoundedTensorSpec( spec.shape, spec.dtype, spec.minimum, spec.maximum)
def testMasking(self): batch_size = 1000 num_state_dims = 5 num_actions = 8 observations = tf.random.uniform([batch_size, num_state_dims]) time_step = ts.restart(observations, batch_size=batch_size) input_tensor_spec = tensor_spec.TensorSpec([num_state_dims], tf.float32) action_spec = tensor_spec.BoundedTensorSpec([1], tf.int32, 0, num_actions - 1) mask = [0, 1, 0, 1, 0, 0, 1, 0] np_mask = np.array(mask) tf_mask = tf.constant([mask for _ in range(batch_size)]) q_net = q_network.QNetwork(input_tensor_spec, action_spec, mask_split_fn=lambda observation: (observation, tf_mask)) policy = q_policy.QPolicy(ts.time_step_spec(input_tensor_spec), action_spec, q_net) # Force creation of variables before global_variables_initializer. policy.variables() self.evaluate(tf.compat.v1.global_variables_initializer()) # Sample from the policy 1000 times and ensure that invalid actions are # never chosen. action_step = policy.action(time_step) action = self.evaluate(action_step.action) self.assertEqual(action.shape, (batch_size, 1)) self.assertAllEqual(np_mask[action], np.ones([batch_size, 1]))
def testEncodeDecodeBoundedTensorSpecNoName(self): structure = [ tensor_spec.BoundedTensorSpec((28, 28, 3), dtypes.float64, -2, (1, 1, 20)) ] self.assertTrue(self._coder.can_encode(structure)) encoded = self._coder.encode_structure(structure) expected = struct_pb2.StructuredValue() expected_list = expected.list_value expected_tensor_spec = expected_list.values.add( ).bounded_tensor_spec_value expected_tensor_spec.shape.dim.add().size = 28 expected_tensor_spec.shape.dim.add().size = 28 expected_tensor_spec.shape.dim.add().size = 3 expected_tensor_spec.name = "" expected_tensor_spec.dtype = dtypes.float64.as_datatype_enum expected_tensor_spec.minimum.CopyFrom( tensor_util.make_tensor_proto([-2], dtype=dtypes.float64, shape=[])) expected_tensor_spec.maximum.CopyFrom( tensor_util.make_tensor_proto([1, 1, 20], dtype=dtypes.float64, shape=[3])) self.assertEqual(expected, encoded) decoded = self._coder.decode_proto(encoded) self.assertEqual(structure, decoded)
def make_server(): return reverb_server.Server( tables=[ reverb_server.Table( 'dist', sampler=item_selectors.Prioritized(priority_exponent=1), remover=item_selectors.Fifo(), max_size=1000000, rate_limiter=rate_limiters.MinSize(1)), reverb_server.Table( 'signatured', sampler=item_selectors.Prioritized(priority_exponent=1), remover=item_selectors.Fifo(), max_size=1000000, rate_limiter=rate_limiters.MinSize(1), signature=tf.TensorSpec(dtype=tf.float32, shape=(None, None))), reverb_server.Table( 'bounded_spec_signatured', sampler=item_selectors.Prioritized(priority_exponent=1), remover=item_selectors.Fifo(), max_size=1000000, rate_limiter=rate_limiters.MinSize(1), # Currently only the `shape` and `dtype` of the bounded spec # is considered during signature check. # TODO(b/158033101): Check the boundaries as well. signature=tensor_spec.BoundedTensorSpec(dtype=tf.float32, shape=(None, None), minimum=(0.0, 0.0), maximum=(10.0, 10.)), ), ], port=None, )
def observation_spec(self): obs = super(NormalizeWrapper,self).observation_spec() maxi = tf.ones([len(self.upper)]) mini = tf.zeros([len(self .upper)]) new_obs = tensor_spec.BoundedTensorSpec(shape=maxi.shape,dtype=obs.dtype, name=obs.name,minimum=mini,maximum=maxi) return new_obs
def testFromBoundedTensorSpec(self): bounded_spec = tensor_spec.BoundedTensorSpec((1, 2), dtypes.int32, 0, 1) spec = tensor_spec.TensorSpec.from_spec(bounded_spec) self.assertEqual(bounded_spec.shape, spec.shape) self.assertEqual(bounded_spec.dtype, spec.dtype) self.assertEqual(bounded_spec.name, spec.name)
def testMinimumMaximumAttributes(self): spec = tensor_spec.BoundedTensorSpec( (1, 2, 3), dtypes.float32, 0, (5, 5, 5)) self.assertEqual(type(spec.minimum), np.ndarray) self.assertEqual(type(spec.maximum), np.ndarray) self.assertAllEqual(spec.minimum, np.array(0, dtype=np.float32)) self.assertAllEqual(spec.maximum, np.array([5, 5, 5], dtype=np.float32))
def testNotWriteableNP(self): spec = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, 0, (5, 5, 5)) with self.assertRaisesRegexp(ValueError, "read-only"): spec.minimum[0] = -1 with self.assertRaisesRegexp(ValueError, "read-only"): spec.maximum[0] = 100
def time_step_spec(self): timestepspec = super(TFhistoryWrapper,self).time_step_spec() base = timestepspec.observation maxi = np.tile( base.maximum,self.history_n) mini = np.tile( base.minimum,self.history_n) new_obs = tensor_spec.BoundedTensorSpec(shape=maxi.shape,dtype=base.dtype,name=base.name,minimum=mini,maximum=maxi) return ts.TimeStep(timestepspec.step_type, timestepspec.reward, timestepspec.discount,observation = new_obs)
def extract_action_spec(self, step): action_specs = [ self.act_wrapper._merged_spatial_action_spec, self.act_wrapper._merged_structured_action_spec ] if step < -1: print("generate None spec for step < -1") return None action_spec = dict() if step >= 0: for spec in action_specs: for k, v in spec.items(): if step < len(v): action_spec[k] = v[step] else: action_spec['structured'] = tensor_spec.BoundedTensorSpec( shape=(), dtype=np.int32, name="sc2_func_action_spec", minimum=(0, ), maximum=(len(self.act_wrapper.func_ids) - 1, )) return action_spec
def testFromBoundedTensorSpec(self): spec_1 = tensor_spec.BoundedTensorSpec((1, 2), dtypes.int32, minimum=0, maximum=1) spec_2 = tensor_spec.BoundedTensorSpec.from_spec(spec_1) self.assertEqual(spec_1, spec_2)
def testMultipleActionsRaiseError(self): action_spec = [tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1)] * 2 with self.assertRaisesRegexp( NotImplementedError, 'action_spec can only contain a single BoundedTensorSpec'): hetero_q_policy.HeteroQPolicy(self._time_step_spec, action_spec, q_network=DummyNet())
def testActionSpecsIncompatible(self): network_action_spec = tensor_spec.BoundedTensorSpec([2], tf.int32, 0, 1) q_net = DummyNetWithActionSpec(network_action_spec) with self.assertRaisesRegexp( ValueError, 'action_spec must be compatible with q_network.action_spec'): q_policy.QPolicy(self._time_step_spec, self._action_spec, q_net)
def build_obs_spec(self,obs): if not self.isAtari: maxi = np.tile( obs.maximum,self.history_n) mini = np.tile( obs.minimum,self.history_n) else: maxi = np.tile( obs.maximum,self.history_n*obs.shape[0]) mini = np.tile( obs.minimum,self.history_n*obs.shape[0]) new_obs = tensor_spec.BoundedTensorSpec(shape=maxi.shape,dtype=tf.float32,name=obs.name,minimum=mini,maximum=maxi) return new_obs
def do_decode(self, value, decode_fn): btsv = value.bounded_tensor_spec_value name = btsv.name return tensor_spec.BoundedTensorSpec( shape=decode_fn( struct_pb2.StructuredValue(tensor_shape_value=btsv.shape)), dtype=decode_fn( struct_pb2.StructuredValue(tensor_dtype_value=btsv.dtype)), minimum=tensor_util.MakeNdarray(btsv.minimum), maximum=tensor_util.MakeNdarray(btsv.maximum), name=(name if name else None))
def testEquality(self): spec_1_1 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, 0, (5, 5, 5)) spec_1_2 = tensor_spec.BoundedTensorSpec( (1, 2, 3), dtypes.float32, 0.00000001, (5, 5, 5.00000000000000001)) spec_2_1 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, 1, (5, 5, 5)) spec_2_2 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, (1, 1, 1), (5, 5, 5)) spec_2_3 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, (1, 1, 1), 5) spec_3_1 = tensor_spec.BoundedTensorSpec((1, 2, 3), dtypes.float32, (2, 1, 1), (5, 5, 5)) self.assertEqual(spec_1_1, spec_1_2) self.assertEqual(spec_1_2, spec_1_1) self.assertNotEqual(spec_1_1, spec_2_2) self.assertNotEqual(spec_1_1, spec_2_1) self.assertNotEqual(spec_2_2, spec_1_1) self.assertNotEqual(spec_2_1, spec_1_1) self.assertEqual(spec_2_1, spec_2_2) self.assertEqual(spec_2_2, spec_2_1) self.assertEqual(spec_2_2, spec_2_3) self.assertNotEqual(spec_1_1, spec_3_1) self.assertNotEqual(spec_2_1, spec_3_1) self.assertNotEqual(spec_2_2, spec_3_1)
def test_sets_dtypes_from_bounded_spec_signature(self): bounded_spec_signature = { 'a': { 'b': tensor_spec.BoundedTensorSpec([3, 3], tf.float32, 0, 3), 'c': tensor_spec.BoundedTensorSpec([], tf.int64, 0, 5), }, } server = reverb_server.Server([ reverb_server.Table.queue( 'queue', 10, signature=bounded_spec_signature) ]) dataset = reverb_dataset.ReplayDataset.from_table_signature( f'localhost:{server.port}', 'queue', 100) self.assertDictEqual( dataset.element_spec.data, { 'a': { 'b': tf.TensorSpec([3, 3], tf.float32), 'c': tf.TensorSpec([], tf.int64), }, })
def extract_func_action_spec(self, current_action_step): if current_action_step == -1: func_action_spec = None else: func_action_spec = dict() func_action_spec['discrete'] = tensor_spec.BoundedTensorSpec( shape=(), dtype=np.int32, name='functions', minimum=(0, ), maximum=(len(self.act_wrapper.func_ids) - 1, )) return func_action_spec
def testActionScalarSpec(self): action_spec = tensor_spec.BoundedTensorSpec((), tf.int32, 0, 1) policy = q_policy.QPolicy(self._time_step_spec, action_spec, q_network=DummyNet()) observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) time_step = ts.restart(observations, batch_size=2) action_step = policy.action(time_step, seed=1) self.assertEqual(action_step.action.shape.as_list(), [2]) self.assertEqual(action_step.action.dtype, tf.int32) # Initialize all variables self.evaluate(tf.compat.v1.global_variables_initializer()) action = self.evaluate(action_step.action) self.assertTrue(np.all(action >= 0) and np.all(action <= 1))
def testActionList(self): action_spec = [tensor_spec.BoundedTensorSpec([1], tf.int32, 0, 1)] policy = q_policy.QPolicy(self._time_step_spec, action_spec, q_network=DummyNet()) observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) time_step = ts.restart(observations, batch_size=2) action_step = policy.action(time_step, seed=1) self.assertIsInstance(action_step.action, list) self.evaluate(tf.compat.v1.global_variables_initializer()) action = self.evaluate(action_step.action) self.assertLen(action, 1) # Extract contents from the outer list. action = action[0] self.assertTrue(np.all(action >= 0) and np.all(action <= 1))
def testActionWithinBounds(self): bounded_action_spec = tensor_spec.BoundedTensorSpec([1], tf.int32, minimum=-6, maximum=-5) policy = q_policy.QPolicy(self._time_step_spec, bounded_action_spec, q_network=DummyNet()) observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) time_step = ts.restart(observations, batch_size=2) action_step = policy.action(time_step) self.assertEqual(action_step.action.shape.as_list(), [2, 1]) self.assertEqual(action_step.action.dtype, tf.int32) # Initialize all variables self.evaluate(tf.compat.v1.global_variables_initializer()) action = self.evaluate(action_step.action) self.assertTrue(np.all(action <= -5) and np.all(action >= -6))
def build_structured_action_spec(self): merged_structured_action_spec = collections.defaultdict(list) for action_type, func_args in self._structured_func_args.items(): max_num_args = max([len(arg[1]) for arg in func_args]) args_per_step = collections.defaultdict(set) action_shape = (1, ) action_dtype = tf.int32 for args in func_args: for i, arg in enumerate(args[1]): args_per_step[i].add(arg) idx2arg, arg2idx = dict(), dict() for step_i in range(max_num_args): idx2arg[step_i], arg2idx[step_i] = dict(), dict() args_per_step[step_i] = list(args_per_step[step_i]) idx = 0 for arg in args_per_step[step_i]: if arg.name is not 'screen2': assert len(arg.sizes) == 1 arg2idx[step_i][arg] = (idx, idx + arg.sizes[0]) for j in range(arg.sizes[0]): idx2arg[step_i][idx] = (arg, j) idx += 1 else: arg2idx[step_i][arg] = (idx, idx + 1) idx2arg[step_i][idx] = (arg, 0) idx += 1 combined_spec = tensor_spec.BoundedTensorSpec( shape=action_shape, dtype=action_dtype, name=action_type + '_step_' + str(step_i), minimum=(0, ), maximum=(idx - 1, )) merged_structured_action_spec[action_type].append( combined_spec) self._structured_id2func[action_type] = idx2arg self._structured_func2id[action_type] = arg2idx self._merged_structured_action_spec = list2tuple( merged_structured_action_spec) return
def _transform_action_spec(self, time_step_spec, action_spec, spatial_names, structured_names): num_actions = 0 for name in spatial_names: assert name in time_step_spec.observation height = time_step_spec.observation[name][0].shape[0] width = time_step_spec.observation[name][0].shape[1] if name in action_spec: num_actions += height*width*(action_spec[name].maximum[0]-action_spec[name].minimum[0]+1) for name in structured_names: if name in action_spec: num_actions += action_spec[name].maximum[0]-action_spec[name].minimum[0]+1 # include no-op action self._total_num_actions = num_actions + 1 return tensor_spec.BoundedTensorSpec( shape=(), dtype=np.int32, name='combined_action_spec', minimum=(0,), maximum=(num_actions - 1,))
def testEncodeDecodeBoundedTensorSpec(self): structure = [ tensor_spec.BoundedTensorSpec([1, 2, 3], dtypes.int64, 0, 10, "hello-0-10") ] self.assertTrue(self._coder.can_encode(structure)) encoded = self._coder.encode_structure(structure) expected = struct_pb2.StructuredValue() expected_list = expected.list_value expected_tensor_spec = expected_list.values.add().bounded_tensor_spec_value expected_tensor_spec.shape.dim.add().size = 1 expected_tensor_spec.shape.dim.add().size = 2 expected_tensor_spec.shape.dim.add().size = 3 expected_tensor_spec.name = "hello-0-10" expected_tensor_spec.dtype = dtypes.int64.as_datatype_enum expected_tensor_spec.minimum.CopyFrom( tensor_util.make_tensor_proto([0], dtype=dtypes.int64, shape=[])) expected_tensor_spec.maximum.CopyFrom( tensor_util.make_tensor_proto([10], dtype=dtypes.int64, shape=[])) self.assertEqual(expected, encoded) decoded = self._coder.decode_proto(encoded) self.assertEqual(structure, decoded)