def testNumericKerasInput(self): key = 'feature_key' batch_size = 3 state_dims = 5 input_shape = (batch_size, state_dims) keras_input = tf.keras.Input(shape=[state_dims], dtype=tf.int32, name=key) state = {key: tf.ones(input_shape, tf.int32)} input_spec = {key: tensor_spec.TensorSpec([state_dims], tf.int32)} network = encoding_network.EncodingNetwork( input_spec, preprocessing_combiner=tf.keras.Sequential([keras_input])) output, _ = network(state) self.assertEqual(input_shape, output.shape)
def test_empty_layers(self): input_spec = tensor_spec.TensorSpec((2, 3), tf.float32) network = encoding_network.EncodingNetwork(input_spec, ) variables = network.variables self.assertEqual(0, len(variables)) # Only one layer to flatten input. self.assertEqual(1, len(network.layers)) config = network.layers[0].get_config() self.assertEqual('flatten', config['name']) out, _ = network(tf.ones((1, 2, 3))) self.assertAllEqual(out, [[1, 1, 1, 1, 1, 1]])
def create_feed_forward_common_tower_network(observation_spec, global_layers, arm_layers, common_layers): """Creates a common tower network with feedforward towers. Args: observation_spec: A nested tensor spec containing the specs for global as well as per-arm observations. global_layers: Iterable of ints. Specifies the layers of the global tower. arm_layers: Iterable of ints. Specifies the layers of the arm tower. common_layers: Iterable of ints. Specifies the layers of the common tower. Returns: A network that takes observations adhering observation_spec and outputs reward estimates for every action. """ global_network = encoding_network.EncodingNetwork( input_tensor_spec=observation_spec[ bandit_spec_utils.GLOBAL_FEATURE_KEY], fc_layer_params=global_layers) one_dim_per_arm_obs = tensor_spec.TensorSpec(shape=observation_spec[ bandit_spec_utils.PER_ARM_FEATURE_KEY].shape[1:], dtype=tf.float32) arm_network = encoding_network.EncodingNetwork( input_tensor_spec=one_dim_per_arm_obs, fc_layer_params=arm_layers) common_input_dim = global_layers[-1] + arm_layers[-1] common_input_spec = tensor_spec.TensorSpec(shape=(common_input_dim, ), dtype=tf.float32) common_network = q_network.QNetwork( input_tensor_spec=common_input_spec, action_spec=tensor_spec.BoundedTensorSpec(shape=(), minimum=0, maximum=0, dtype=tf.int32), fc_layer_params=common_layers) return GlobalAndArmCommonTowerNetwork(observation_spec, global_network, arm_network, common_network)
def test_empty_layers(self): input_spec = tensor_spec.TensorSpec((2, 3), tf.float32) network = encoding_network.EncodingNetwork(input_spec, ) with self.assertRaises(ValueError): network.variables # pylint: disable=pointless-statement # Only one layer to flatten input. self.assertLen(network.layers, 1) config = network.layers[0].get_config() self.assertEqual('flatten', config['name']) out, _ = network(tf.ones((1, 2, 3))) self.assertAllEqual(out, [[1, 1, 1, 1, 1, 1]]) self.assertLen(network.variables, 0)
def testNumericFeatureColumnInput(self): key = 'feature_key' batch_size = 3 state_dims = 5 input_shape = (batch_size, state_dims) column = tf.feature_column.numeric_column(key, [state_dims]) state = {key: tf.ones(input_shape, tf.int32)} input_spec = {key: tensor_spec.TensorSpec([state_dims], tf.int32)} dense_features = tf.compat.v2.keras.layers.DenseFeatures([column]) network = encoding_network.EncodingNetwork( input_spec, preprocessing_combiner=dense_features) output, _ = network(state) self.assertEqual(input_shape, output.shape)
def testCombinedFeatureColumnInput(self): columns = {} tensors = {} specs = {} expected_dim = 0 indicator_key = 'indicator_key' vocab_list = [2, 3, 4] column1 = tf.feature_column.categorical_column_with_vocabulary_list( indicator_key, vocab_list) columns[indicator_key] = tf.feature_column.indicator_column(column1) state_input = [3, 2, 2, 4, 3] tensors[indicator_key] = tf.expand_dims(state_input, -1) specs[indicator_key] = tensor_spec.TensorSpec([1], tf.int32) expected_dim += len(vocab_list) # TODO(b/134950354): Test embedding column for non-eager mode only for now. if not tf.executing_eagerly(): embedding_key = 'embedding_key' embedding_dim = 3 vocab_list = [2, 3, 4] column2 = tf.feature_column.categorical_column_with_vocabulary_list( embedding_key, vocab_list) columns[embedding_key] = tf.feature_column.embedding_column( column2, embedding_dim) state_input = [3, 2, 2, 4, 3] tensors[embedding_key] = tf.expand_dims(state_input, -1) specs[embedding_key] = tensor_spec.TensorSpec([1], tf.int32) expected_dim += embedding_dim numeric_key = 'numeric_key' batch_size = 5 state_dims = 3 input_shape = (batch_size, state_dims) columns[numeric_key] = tf.feature_column.numeric_column( numeric_key, [state_dims]) tensors[numeric_key] = tf.ones(input_shape, tf.int32) specs[numeric_key] = tensor_spec.TensorSpec([state_dims], tf.int32) expected_dim += state_dims dense_features = tf.compat.v2.keras.layers.DenseFeatures( columns.values()) network = encoding_network.EncodingNetwork( specs, preprocessing_combiner=dense_features) output, _ = network(tensors) expected_shape = (batch_size, expected_dim) self.assertEqual(expected_shape, output.shape)
def testDropoutFCLayers(self, training): batch_size = 3 num_obs_dims = 5 obs_spec = tensor_spec.TensorSpec([num_obs_dims], tf.float32) network = encoding_network.EncodingNetwork(obs_spec, fc_layer_params=[20], dropout_layer_params=[0.5]) obs = tf.random.uniform([batch_size, num_obs_dims]) output1, _ = network(obs, training=training) output2, _ = network(obs, training=training) self.evaluate(tf.compat.v1.global_variables_initializer()) output1, output2 = self.evaluate([output1, output2]) if training: self.assertGreater(np.linalg.norm(output1 - output2), 0) else: self.assertAllEqual(output1, output2)
def __init__(self, observation_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(75, 40), dropout_layer_params=None, activation_fn=tf.keras.activations.relu, enable_last_layer_zero_initializer=False, name='ActorNetwork'): super(ActorNetwork, self).__init__(input_tensor_spec=observation_spec, state_spec=(), name=name) # For simplicity we will only support a single action float output. self._action_spec = action_spec flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] if self._single_action_spec.dtype not in [tf.float32, tf.float64]: raise ValueError( 'Only float actions are supported by this network.') kernel_initializer = tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') self._encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=False) initializer = tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003) self._action_projection_layer = tf.keras.layers.Dense( flat_action_spec[0].shape.num_elements(), activation=tf.keras.activations.tanh, kernel_initializer=initializer, name='action')
def testIndicatorFeatureColumnInput(self): key = 'feature_key' vocab_list = [2, 3, 4] column = tf.feature_column.categorical_column_with_vocabulary_list( key, vocab_list) column = tf.feature_column.indicator_column(column) state_input = [3, 2, 2, 4, 3] state = {key: tf.expand_dims(state_input, -1)} input_spec = {key: tensor_spec.TensorSpec([1], tf.int32)} dense_features = tf.compat.v2.keras.layers.DenseFeatures([column]) network = encoding_network.EncodingNetwork( input_spec, preprocessing_combiner=dense_features) output, _ = network(state) expected_shape = (len(state_input), len(vocab_list)) self.assertEqual(expected_shape, output.shape)
def __init__( self, observation_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(75, 40), dropout_layer_params=None, # enable_last_layer_zero_initializer=False, name='ActorNetwork'): # call super super(CustomActorNetwork, self).__init__(input_tensor_spec=observation_spec, state_spec=(), name=name) # check action_spec self._action_spec = action_spec flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) != 1: raise ValueError( 'flatten action_spec should be len=2, but get len={}'.format( len(flat_action_spec))) self._single_action_spec = flat_action_spec[0] # set up kernel_initializer # kernel_initializer = tf.keras.initializers.VarianceScaling(scale=1. / 3., mode='fan_in', distribution='uniform') # set up encoder_network self._encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=tf.keras.activations.relu, # kernel_initializer=kernel_initializer, batch_squash=False) # set up action_projection layer # initializer = tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003) self._action_projection_layer = tf.keras.layers.Dense( flat_action_spec[0].shape.num_elements(), activation=tf.keras.activations.tanh, # kernel_initializer=initializer, name='action_projection_layer')
def __init__(self, input_tensor_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(75, 40), dropout_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=None, batch_squash=True, dtype=tf.float32, name='Q4Network'): num_actions = action_spec.maximum - action_spec.minimum + 1 encoder_input_tensor_spec = input_tensor_spec encoder = encoding_network.EncodingNetwork( encoder_input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype) q_value_layer = tf.keras.layers.Dense( num_actions * 4, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), bias_initializer=tf.compat.v1.initializers.constant(-0.2), dtype=dtype) super(Q4Network, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._encoder = encoder self._q_value_layer = q_value_layer
def test_dict_spec_and_pre_processing(self): input_spec = { 'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32), 'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32) } network = encoding_network.EncodingNetwork( input_spec, preprocessing_layers={ 'a': tf.keras.layers.Flatten(), 'b': tf.keras.layers.Flatten() }, fc_layer_params=(), preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1), activation_fn=tf.keras.activations.tanh, ) sample_input = tensor_spec.sample_spec_nest(input_spec) output, _ = network(sample_input) # 6144 is the shape from a concat of flat (32, 32, 3) x2. self.assertEqual((6144, ), output.shape)
def __init__(self, observation_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(64, 64), dropout_layer_params=None, activation_fn=tf.keras.activations.relu, enable_last_layer_zero_initializer=False, name='CustomActorNetwork'): super().__init__(input_tensor_spec=observation_spec, state_spec=(), name=name) self._action_spec = action_spec flat_action_spec = tf.nest.flatten(action_spec) self._single_action_spec = flat_action_spec[0] kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform() # kernel_initializer=tf.keras.initializers.VarianceScaling( # scale=1./3., mode='fan_in', distribution='uniform') self._encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=False) initializer = tf.keras.initializers.RandomUniform(-0.001, 0.001) self._action_projection_layer = tf.keras.layers.Dense( flat_action_spec[0].shape.num_elements(), activation=tf.keras.activations.tanh, kernel_initializer=initializer, name='action')
def test_non_preprocessing_layers_2d(self): input_spec = tensor_spec.TensorSpec((32, 32, 3), tf.float32) network = encoding_network.EncodingNetwork( input_spec, conv_layer_params=((16, 2, 1), (15, 2, 1)), fc_layer_params=(10, 5, 2), activation_fn=tf.keras.activations.tanh, ) network.create_variables() variables = network.variables self.assertLen(variables, 10) self.assertLen(network.layers, 6) # Validate first conv layer. config = network.layers[0].get_config() self.assertEqual('tanh', config['activation']) self.assertEqual((2, 2), config['kernel_size']) self.assertEqual(16, config['filters']) self.assertEqual((1, 1), config['strides']) self.assertTrue(config['trainable']) # Validate second conv layer. config = network.layers[1].get_config() self.assertEqual('tanh', config['activation']) self.assertEqual((2, 2), config['kernel_size']) self.assertEqual(15, config['filters']) self.assertEqual((1, 1), config['strides']) self.assertTrue(config['trainable']) # Validate flatten layer. config = network.layers[2].get_config() self.assertEqual('flatten', config['name']) # Validate dense layers. self.assertEqual(10, network.layers[3].get_config()['units']) self.assertEqual(5, network.layers[4].get_config()['units']) self.assertEqual(2, network.layers[5].get_config()['units'])
def test_layers_buildable(self): input_spec = { 'a': tensor_spec.TensorSpec((32, 32, 3), tf.float32), 'b': tensor_spec.TensorSpec((32, 32, 3), tf.float32) } network = encoding_network.EncodingNetwork( input_spec, preprocessing_layers={ 'a': tf.keras.Sequential([ tf.keras.layers.Dense(4, activation='tanh'), tf.keras.layers.Flatten() ]), 'b': tf.keras.layers.Flatten() }, fc_layer_params=(), preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1), activation_fn=tf.keras.activations.tanh, ) network.create_variables() self.assertNotEmpty(network.variables)
def __init__(self, observation_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(75, 40), dropout_layer_params=None, activation_fn=tf.keras.activations.relu, enable_last_layer_zero_initializer=False, name='ActorNetwork'): super().__init__(input_tensor_spec=observation_spec, state_spec=(), name=name) self._action_spec = action_spec flat_action_spec = tf.nest.flatten(action_spec) self._single_action_spec = flat_action_spec[0] kernel_initializer = tf.keras.initializers.VarianceScaling( scale=1.0 / 3.0, mode='fan_in', distribution='uniform') self._encoder = \ encoding_network.EncodingNetwork(observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=False) initializer = tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003) self._action_projection_layer = tf.keras.layers.Dense( 9, activation=tf.keras.activations.tanh, kernel_initializer=initializer, name='action')
def testKerasIntegerLookup(self): if not tf.executing_eagerly(): self.skipTest('This test is TF2 only.') key = 'feature_key' vocab_list = [2, 3, 4] keras_input = tf.keras.Input(shape=(1,), name=key, dtype=tf.dtypes.int32) id_input = keras_preprocessing.IntegerLookup(vocabulary=vocab_list) encoded_input = keras_preprocessing.CategoryEncoding( max_tokens=len(vocab_list)) state_input = [3, 2, 2, 4, 3] state = {key: tf.expand_dims(state_input, -1)} input_spec = {key: tensor_spec.TensorSpec([1], tf.int32)} network = encoding_network.EncodingNetwork( input_spec, preprocessing_combiner=tf.keras.Sequential( [keras_input, id_input, encoded_input])) output, _ = network(state) expected_shape = (len(state_input), len(vocab_list)) self.assertEqual(expected_shape, output.shape)
def create_feed_forward_common_tower_network( observation_spec: types.NestedTensorSpec, global_layers: Sequence[int], arm_layers: Sequence[int], common_layers: Sequence[int], output_dim: int = 1, global_preprocessing_combiner: Optional[Callable[..., types.Tensor]] = None, arm_preprocessing_combiner: Optional[Callable[..., types.Tensor]] = None, activation_fn: Callable[[types.Tensor], types.Tensor] = tf.keras.activations.relu ) -> types.Network: """Creates a common tower network with feedforward towers. The network produced by this function can be used either in `GreedyRewardPredictionPolicy`, or `NeuralLinUCBPolicy`. In the former case, the network must have `output_dim=1`, it is going to be an instance of `QNetwork`, and used in the policy as a reward prediction network. In the latter case, the network will be an encoding network with its output consumed by a reward layer or a LinUCB method. The specified `output_dim` will be the encoding dimension. Args: observation_spec: A nested tensor spec containing the specs for global as well as per-arm observations. global_layers: Iterable of ints. Specifies the layers of the global tower. arm_layers: Iterable of ints. Specifies the layers of the arm tower. common_layers: Iterable of ints. Specifies the layers of the common tower. output_dim: The output dimension of the network. If 1, the common tower will be a QNetwork. Otherwise, the common tower will be an encoding network with the specified output dimension. global_preprocessing_combiner: Preprocessing combiner for global features. arm_preprocessing_combiner: Preprocessing combiner for the arm features. activation_fn: A keras activation, specifying the activation function used in all layers. Defaults to relu. Returns: A network that takes observations adhering observation_spec and outputs reward estimates for every action. """ obs_spec_no_num_actions = _remove_num_actions_dim_from_spec(observation_spec) global_network = encoding_network.EncodingNetwork( input_tensor_spec=obs_spec_no_num_actions[ bandit_spec_utils.GLOBAL_FEATURE_KEY], fc_layer_params=global_layers, activation_fn=activation_fn, preprocessing_combiner=global_preprocessing_combiner) arm_network = encoding_network.EncodingNetwork( input_tensor_spec=obs_spec_no_num_actions[ bandit_spec_utils.PER_ARM_FEATURE_KEY], fc_layer_params=arm_layers, activation_fn=activation_fn, preprocessing_combiner=arm_preprocessing_combiner) common_input_dim = global_layers[-1] + arm_layers[-1] common_input_spec = tensor_spec.TensorSpec( shape=(common_input_dim,), dtype=tf.float32) if output_dim == 1: common_network = q_network.QNetwork( input_tensor_spec=common_input_spec, action_spec=tensor_spec.BoundedTensorSpec( shape=(), minimum=0, maximum=0, dtype=tf.int32), fc_layer_params=common_layers, activation_fn=activation_fn) else: common_network = encoding_network.EncodingNetwork( input_tensor_spec=common_input_spec, fc_layer_params=list(common_layers) + [output_dim], activation_fn=activation_fn) return GlobalAndArmCommonTowerNetwork(obs_spec_no_num_actions, global_network, arm_network, common_network)
def __init__( self, input_tensor_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, input_fc_layer_params=(75, 40), lstm_size=(40,), output_fc_layer_params=(75, 40), activation_fn=tf.keras.activations.relu, dtype=tf.float32, name='LSTMEncodingNetwork', ): """Creates an instance of `LSTMEncodingNetwork`. Input preprocessing is possible via `preprocessing_layers` and `preprocessing_combiner` Layers. If the `preprocessing_layers` nest is shallower than `input_tensor_spec`, then the layers will get the subnests. For example, if: ```python input_tensor_spec = ([TensorSpec(3)] * 2, [TensorSpec(3)] * 5) preprocessing_layers = (Layer1(), Layer2()) ``` then preprocessing will call: ```python preprocessed = [preprocessing_layers[0](observations[0]), preprocessing_layers[1](obsrevations[1])] ``` However if ```python preprocessing_layers = ([Layer1() for _ in range(2)], [Layer2() for _ in range(5)]) ``` then preprocessing will call: ```python preprocessed = [ layer(obs) for layer, obs in zip(flatten(preprocessing_layers), flatten(observations)) ] ``` Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the observations. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). input_fc_layer_params: Optional list of fully connected parameters, where each item is the number of units in the layer. These feed into the recurrent layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully connected parameters, where each item is the number of units in the layer. These are applied on top of the recurrent layer. activation_fn: Activation function, e.g. tf.keras.activations.relu,. dtype: The dtype to use by the convolution, LSTM, and fully connected layers. name: A string representing name of the network. Raises: ValueError: If any of `preprocessing_layers` is already built. ValueError: If `preprocessing_combiner` is already built. """ kernel_initializer = tf.compat.v1.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal') input_encoder = encoding_network.EncodingNetwork( input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=input_fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, dtype=dtype) # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell( lstm_size[0], dtype=dtype, implementation=KERAS_LSTM_FUSED_IMPLEMENTATION) else: cell = tf.keras.layers.StackedRNNCells([ tf.keras.layers.LSTMCell( # pylint: disable=g-complex-comprehension size, dtype=dtype, implementation=KERAS_LSTM_FUSED_IMPLEMENTATION) for size in lstm_size ]) output_encoder = [] if output_fc_layer_params: output_encoder = [ tf.keras.layers.Dense( num_units, activation=activation_fn, kernel_initializer=kernel_initializer, dtype=dtype, name='/'.join([name, 'dense'])) for num_units in output_fc_layer_params ] counter = [-1] def create_spec(size): counter[0] += 1 return tensor_spec.TensorSpec( size, dtype=dtype, name='network_state_%d' % counter[0]) state_spec = tf.nest.map_structure(create_spec, cell.state_size) super(LSTMEncodingNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=state_spec, name=name) self._conv_layer_params = conv_layer_params self._input_encoder = input_encoder self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell) self._output_encoder = output_encoder
def __init__( self, input_tensor_spec, preprocessing_combiner=None, joint_fc_layer_params=None, joint_dropout_layer_params=None, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), activation_fn=tf.nn.relu, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. preprocessing_combiner: Combiner layer for obs and action inputs joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default glorot_uniform activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ observation_spec, action_spec = input_tensor_spec if (len(tf.nest.flatten(observation_spec)) > 1 and preprocessing_combiner is None): raise ValueError( 'Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] preprocessing_layers = None # combiner assumes a single batch dimension, without time super(CriticNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._encoder = encoding_network.EncodingNetwork( input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, fc_layer_params=joint_fc_layer_params, dropout_layer_params=joint_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=False) self._value_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value')
def testCombinedKerasPreprocessingLayers(self): if not tf.executing_eagerly(): self.skipTest('This test is TF2 only.') inputs = {} features = {} tensors = {} specs = {} expected_dim = 0 indicator_key = 'indicator_key' vocab_list = [2, 3, 4] inputs[indicator_key] = tf.keras.Input(shape=(1, ), dtype=tf.dtypes.int32, name=indicator_key) features[indicator_key] = keras_preprocessing.IntegerLookup( vocabulary=vocab_list, num_oov_indices=0, output_mode='multi_hot')(inputs[indicator_key]) state_input = [3, 2, 2, 4, 3] tensors[indicator_key] = tf.expand_dims(state_input, -1) specs[indicator_key] = tensor_spec.TensorSpec([1], tf.int32) expected_dim += len(vocab_list) embedding_key = 'embedding_key' embedding_dim = 3 vocab_list = [2, 3, 4] inputs[embedding_key] = tf.keras.Input(shape=(1, ), dtype=tf.dtypes.int32, name=embedding_key) id_input = keras_preprocessing.IntegerLookup(vocabulary=vocab_list, num_oov_indices=0)( inputs[embedding_key]) embedding_input = tf.keras.layers.Embedding( input_dim=len(vocab_list), output_dim=embedding_dim)(id_input) features[embedding_key] = tf.reduce_sum(embedding_input, axis=-2) state_input = [3, 2, 2, 4, 3] tensors[embedding_key] = tf.expand_dims(state_input, -1) specs[embedding_key] = tensor_spec.TensorSpec([1], tf.int32) expected_dim += embedding_dim numeric_key = 'numeric_key' batch_size = 5 state_dims = 3 input_shape = (batch_size, state_dims) inputs[numeric_key] = tf.keras.Input(shape=[state_dims], dtype=tf.float32, name=numeric_key) features[numeric_key] = inputs[numeric_key] tensors[numeric_key] = tf.ones(input_shape, tf.float32) specs[numeric_key] = tensor_spec.TensorSpec([state_dims], tf.float32) expected_dim += state_dims features = tf.keras.layers.concatenate(features.values(), axis=-1) # TODO(b/170645185): Replace Model with FunctionalPreprocessingStage. network = encoding_network.EncodingNetwork( specs, preprocessing_combiner=tf.keras.Model(inputs=inputs, outputs=features)) output, _ = network(tensors) expected_shape = (batch_size, expected_dim) self.assertEqual(expected_shape, output.shape)
def __init__(self, input_tensor_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=None, dropout_layer_params=None, a_fc_layer_params=None, a_weight_decay_params=None, a_dropout_layer_params=None, v_fc_layer_params=None, v_weight_decay_params=None, v_dropout_layer_params=None, activation_fn=tf.keras.activations.relu, av_combine_fn=None, kernel_initializer=None, batch_squash=True, dtype=tf.float32, name='DuelQNetwork'): """Creates an instance of `DuelQNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride), used in shared encoder. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer, used in shared encoder *_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer, corresponding to each branch: a_fc_layer_params designed for the advantage branch, v_fc_layer_params designed for the state branch *_weight_decay_params: Optional list of L2 weight decay params, where each item is the L2-regularization strength applied to corresponding fully_connected layer.The weight decay parameters are interleaved with the fully connected layer, except if the list is None. Corresponding to each branch: a_weight_decay_params for the advantage branch, same length as a_fc_layer_params v_weight_decay_params for the state branch, same length as v_fc_layer_params *_dropout_layer_params: Optional list of dropout layer parameters, where each item is the fraction of input units to drop. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. Corresponding to each branch: a_dropout_layer_params for the advantage branch, same length as a_fc_layer_params v_dropout_layer_params for the state branch. same length as v_fc_layer_params activation_fn: Activation function, e.g. tf.keras.activations.relu. av_combine_fn: Function to produce q-value from advantage and state value kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default variance_scaling_initializer batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. dtype: The dtype to use by the convolution and fully connected layers. name: A string representing the name of the network. Raises: ValueError: If `input_tensor_spec` contains more than one observation. Or if `action_spec` contains more than one action. """ q_network.validate_specs(action_spec, input_tensor_spec) action_spec = tf.nest.flatten(action_spec)[0] num_actions = action_spec.maximum - action_spec.minimum + 1 encoder_input_tensor_spec = input_tensor_spec # Shared encoder to convert observation to shared state tensor # which is fed to advantage branch and state branch encoder = encoding_network.EncodingNetwork( encoder_input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype, name='shared_encoder') # Advantage branch # Advantage intermediate fully connected layers a_encode_layers = self.create_branch_layers(a_fc_layer_params, a_dropout_layer_params, a_weight_decay_params, activation_fn, kernel_initializer, dtype, name='a_branch_layer') # Advantage dense layer to project to action space a_value_layer = tf.keras.layers.Dense( num_actions, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), bias_initializer=tf.compat.v1.initializers.constant(-0.2), dtype=dtype, name='a_value_layer') # State branch # State intermediate fully connected layers v_encoder_layers = self.create_branch_layers(v_fc_layer_params, v_dropout_layer_params, v_weight_decay_params, activation_fn, kernel_initializer, dtype, name='v_branch_layer') # State dense layer to project to a single scalar state value v_value_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), bias_initializer=tf.compat.v1.initializers.constant(-0.2), dtype=dtype, name='v_value_layer') super().__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._encoder = encoder self._a_encode_layers = a_encode_layers self._a_value_layer = a_value_layer self._v_encode_layers = v_encoder_layers self._v_value_layer = v_value_layer self._av_combine_fn = av_combine_fn or self.av_combine_f
def __init__(self, input_tensor_spec, output_tensor_spec, gnn, fc_layer_params=None, dropout_layer_params=None, conv_layer_params=None, activation_fn=tf.nn.relu, name='ActorNetwork', params=ParameterServer()): """ Creates an instance of `ActorNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the inputs. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the outputs. gnn: The function that initializes a graph neural network that accepts the input observations and computes node embeddings. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` or `action_spec` contains more than one item, or if the action data type is not `float`. """ super(GNNActorNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(output_tensor_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') if flat_action_spec[0].dtype not in [tf.float32, tf.float64]: raise ValueError( 'Only float actions are supported by this network.') if gnn is None: raise ValueError('`gnn` must not be `None`.') self._gnn = gnn(name=name, params=params) self._latent_trace = None self._encoder = encoding_network.EncodingNetwork( input_tensor_spec=tf.TensorSpec([None, self._gnn._embedding_size]), preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform( ), batch_squash=False, dtype=tf.float32) self._projection_nets = tf.nest.map_structure(projection_net, output_tensor_spec) self._output_tensor_spec = tf.nest.map_structure( lambda proj_net: proj_net.output_spec, self._projection_nets)
def __init__( self, observation_spec, conv_layer_params=None, input_fc_layer_params=(75, 40), lstm_size=(40, ), output_fc_layer_params=(75, 40), activation_fn=tf.keras.activations.relu, name='LSTMEncodingNetwork', ): """Creates an instance of `LSTMEncodingNetwork`. Args: observation_spec: A nest of `tensor_spec.TensorSpec` representing the observations. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). input_fc_layer_params: Optional list of fully connected parameters, where each item is the number of units in the layer. These feed into the recurrent layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully connected parameters, where each item is the number of units in the layer. These are applied on top of the recurrent layer. activation_fn: Activation function, e.g. tf.keras.activations.relu,. name: A string representing name of the network. """ kernel_initializer = tf.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal') input_encoder = encoding_network.EncodingNetwork( observation_spec, conv_layer_params=conv_layer_params, fc_layer_params=input_fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer) # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) output_encoder = ([ tf.keras.layers.Dense(num_units, activation=activation_fn, kernel_initializer=kernel_initializer, name='/'.join([name, 'dense'])) for num_units in output_fc_layer_params ]) state_spec = nest.map_structure( functools.partial(tensor_spec.TensorSpec, dtype=tf.float32, name='network_state_spec'), cell.state_size) super(LSTMEncodingNetwork, self).__init__(observation_spec=observation_spec, action_spec=None, state_spec=state_spec, name=name) self._conv_layer_params = conv_layer_params self._input_encoder = input_encoder self._cell = cell self._output_encoder = output_encoder
def __init__( self, input_tensor_spec, # observation_conv_layer_params=None, # observation_fc_layer_params=None, # observation_dropout_layer_params=None, # action_fc_layer_params=None, # action_dropout_layer_params=None, preprocessing_layers, preprocessing_combiner, joint_fc_layer_params=None, joint_dropout_layer_params=None, joint_activation_fn=tf.nn.relu, output_activation_fn=None, kernel_initializer=None, last_kernel_initializer=None, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. observation_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of observation_fc_layer_params, or be None. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. action_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of action_fc_layer_params, or be None. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... output_activation_fn: Activation function for the last layer. This can be used to restrict the range of the output. For example, one can pass tf.keras.activations.sigmoid here to restrict the output to be bounded between 0 and 1. kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer. If None, a RandomUniform initializer will be used. name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ super(MultiObservationCriticNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec flat_action_spec = tf.nest.flatten(action_spec) self._single_action_spec = flat_action_spec[0] # set up kernel_initializer if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) # set up encoder_network self._encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=None, fc_layer_params=None, dropout_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=kernel_initializer, batch_squash=False) # TODO(kbanoop): Replace mlp_layers with encoding networks. # self._observation_layers = utils.mlp_layers( # observation_conv_layer_params, # observation_fc_layer_params, # observation_dropout_layer_params, # activation_fn=activation_fn, # kernel_initializer=kernel_initializer, # name='observation_encoding') # self._action_layers = utils.mlp_layers( # None, # action_fc_layer_params, # action_dropout_layer_params, # activation_fn=activation_fn, # kernel_initializer=kernel_initializer, # name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, joint_dropout_layer_params, activation_fn=joint_activation_fn, kernel_initializer=kernel_initializer, name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense(1, activation=output_activation_fn, kernel_initializer=last_kernel_initializer, name='value'))
def __init__(self, input_tensor_spec, output_tensor_spec, preprocessing_layers=None, preprocessing_combiner=None, batch_squash=True, fc_layer_params=None, dropout_layer_params=None, conv_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=None, last_kernel_initializer=None, name='ActorNetwork'): """Creates an instance of `ActorNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the inputs. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the outputs. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer. If None, a RandomUniform initializer will be used. name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` or `action_spec` contains more than one item, or if the action data type is not `float`. """ super(PaintingActorNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) # if len(tf.nest.flatten(input_tensor_spec)) > 1: # raise ValueError('Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(output_tensor_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] if self._single_action_spec.dtype not in [tf.float32, tf.float64]: raise ValueError( 'Only float actions are supported by this network.') if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) encoder = encoding_network.EncodingNetwork( input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, name='input_encoding') self._encoder = encoder self._action_layer = tf.keras.layers.Dense( flat_action_spec[0].shape.num_elements(), activation=tf.keras.activations.tanh, kernel_initializer=last_kernel_initializer, name='action') self._output_tensor_spec = output_tensor_spec
def __init__(self, input_tensor_spec, output_tensor_spec, gnn, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(200, 100), dropout_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=None, batch_squash=False, dtype=tf.float32, discrete_projection_net=_categorical_projection_net, continuous_projection_net=_normal_projection_net, name='ActorDistributionNetwork', params=ParameterServer()): """ Creates an instance of `ActorDistributionNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the output. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default glorot_uniform batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. dtype: The dtype to use by the convolution and fully connected layers. discrete_projection_net: Callable that generates a discrete projection network to be called with some hidden state and the outer_rank of the state. continuous_projection_net: Callable that generates a continuous projection network to be called with some hidden state and the outer_rank of the state. name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` contains more than one observation. """ if not kernel_initializer: kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform( ) if gnn is None: raise ValueError('`gnn` must not be `None`.') self._gnn = gnn(name=name + "_GNN", params=params) encoder = encoding_network.EncodingNetwork( input_tensor_spec=tf.TensorSpec([None, self._gnn._embedding_size]), preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype) def map_proj(spec): if tensor_spec.is_discrete(spec): return discrete_projection_net(spec) else: return continuous_projection_net(spec) projection_networks = tf.nest.map_structure(map_proj, output_tensor_spec) output_spec = tf.nest.map_structure( lambda proj_net: proj_net.output_spec, projection_networks) super(GNNActorDistributionNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), output_spec=output_spec, name=name) self._encoder = encoder self._projection_networks = projection_networks self._output_tensor_spec = output_tensor_spec
def __init__(self, input_tensor_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(75, 40), dropout_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=None, batch_squash=True, dtype=tf.float32, name='ValueNetwork'): """Creates an instance of `ValueNetwork`. Network supports calls with shape outer_rank + observation_spec.shape. Note outer_rank must be at least 1. Args: input_tensor_spec: A `tensor_spec.TensorSpec` or a tuple of specs representing the input observations. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.keras.activations.relu,. kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default variance_scaling_initializer batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. dtype: The dtype to use by the convolution and fully connected layers. name: A string representing name of the network. Raises: ValueError: If input_tensor_spec is not an instance of network.InputSpec. """ super(ValueNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) if not kernel_initializer: kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform( ) self._encoder = encoding_network.EncodingNetwork( input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype) self._postprocessing_layers = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.random_uniform_initializer(minval=-0.03, maxval=0.03))
def __init__(self, input_tensor_spec, action_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=(75, 40), dropout_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=None, batch_squash=True, dtype=tf.float32, name='QNetwork'): """Creates an instance of `QNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, where each item is the fraction of input units to drop. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.keras.activations.relu. kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default variance_scaling_initializer batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. dtype: The dtype to use by the convolution and fully connected layers. name: A string representing the name of the network. Raises: ValueError: If `input_tensor_spec` contains more than one observation. Or if `action_spec` contains more than one action. """ validate_specs(action_spec, input_tensor_spec) action_spec = tf.nest.flatten(action_spec)[0] num_actions = action_spec.maximum - action_spec.minimum + 1 encoder_input_tensor_spec = input_tensor_spec encoder = encoding_network.EncodingNetwork( encoder_input_tensor_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=conv_layer_params, fc_layer_params=fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype) q_value_layer = tf.keras.layers.Dense( num_actions, activation=None, kernel_initializer=tf.random_uniform_initializer(minval=-0.03, maxval=0.03), bias_initializer=tf.constant_initializer(-0.2), dtype=dtype) super(QNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._encoder = encoder self._q_value_layer = q_value_layer
def __init__(self, input_tensor_spec, preprocessing_layers=None, preprocessing_combiner=None, batch_squash=True, observation_conv_layer_params=None, observation_fc_layer_params=None, observation_dropout_layer_params=None, action_fc_layer_params=None, action_dropout_layer_params=None, joint_fc_layer_params=None, joint_dropout_layer_params=None, activation_fn=tf.nn.relu, output_activation_fn=None, kernel_initializer=None, last_kernel_initializer=None, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. observation_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of observation_fc_layer_params, or be None. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. action_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of action_fc_layer_params, or be None. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... output_activation_fn: Activation function for the last layer. This can be used to restrict the range of the output. For example, one can pass tf.keras.activations.sigmoid here to restrict the output to be bounded between 0 and 1. kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer. If None, a RandomUniform initializer will be used. name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ super(PaintingCriticNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec # if len(tf.nest.flatten(observation_spec)) > 1: # raise ValueError('Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError('Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=observation_conv_layer_params, fc_layer_params=observation_fc_layer_params, dropout_layer_params=observation_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, name='observation_encoding') self._encoder = encoder self._action_layers = utils.mlp_layers( None, action_fc_layer_params, action_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, joint_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( 1, activation=output_activation_fn, kernel_initializer=last_kernel_initializer, name='value'))