def testCreateAndCall(self): net = sequential.Sequential([ nest_map.NestMap( {'inp1': tf.keras.layers.Dense(8), 'inp2': sequential.Sequential([ tf.keras.layers.Conv2D(2, 3), # Convert 3 inner dimensions to [8] for RNN. inner_reshape.InnerReshape([None] * 3, [8]), ]), 'inp3': tf.keras.layers.LSTM( 8, return_state=True, return_sequences=True)}), nest_map.NestFlatten(), tf.keras.layers.Add()]) self.assertEqual( net.state_spec, ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorSpec(shape=(8,), dtype=tf.float32)],), },)) output_spec = net.create_variables( { 'inp1': tf.TensorSpec(shape=(3,), dtype=tf.float32), 'inp2': tf.TensorSpec(shape=(4, 4, 2,), dtype=tf.float32), 'inp3': tf.TensorSpec(shape=(3,), dtype=tf.float32), }) self.assertEqual(output_spec, tf.TensorSpec(shape=(8,), dtype=tf.float32)) inputs = { 'inp1': tf.ones((8, 10, 3), dtype=tf.float32), 'inp2': tf.ones((8, 10, 4, 4, 2), dtype=tf.float32), 'inp3': tf.ones((8, 10, 3), dtype=tf.float32) } output, next_state = net(inputs) self.assertEqual(output.shape, tf.TensorShape([8, 10, 8])) self.assertEqual( tf.nest.map_structure(lambda t: t.shape, next_state), ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorShape([8, 8])],), },)) # Test passing in a state. output, next_state = net(inputs, next_state) self.assertEqual(output.shape, tf.TensorShape([8, 10, 8])) self.assertEqual( tf.nest.map_structure(lambda t: t.shape, next_state), ({ 'inp1': (), 'inp2': (), 'inp3': (2 * [tf.TensorShape([8, 8])],), },))
def create_sequential_critic_network(obs_fc_layer_units, action_fc_layer_units, joint_fc_layer_units): """Create a sequential critic network.""" # Split the inputs into observations and actions. def split_inputs(inputs): return {'observation': inputs[0], 'action': inputs[1]} # Create an observation network. obs_network = (create_fc_network(obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer()) # Create an action network. action_network = (create_fc_network(action_fc_layer_units) if action_fc_layer_units else create_identity_layer()) # Create a joint network. joint_network = (create_fc_network(joint_fc_layer_units) if joint_fc_layer_units else create_identity_layer()) # Final layer. value_layer = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform') return sequential.Sequential([ tf.keras.layers.Lambda(split_inputs), nest_map.NestMap({ 'observation': obs_network, 'action': action_network }), nest_map.NestFlatten(), tf.keras.layers.Concatenate(), joint_network, value_layer, inner_reshape.InnerReshape(current_shape=[1], new_shape=[]) ], name='sequential_critic')
def create_sequential_actor_net(self, fc_layer_units, action_tensor_spec, seed=None): """Helper method for creating the actor network.""" self._seed_stream = self.seed_stream_class( seed=seed, salt='tf_agents_sequential_layers') def _get_seed(): seed = self._seed_stream() if seed is not None: seed = seed % sys.maxsize return seed def create_dist(loc_and_scale): loc = loc_and_scale['loc'] loc = tanh_and_scale_to_spec(loc, action_tensor_spec) scale = loc_and_scale['scale'] scale = tf.math.softplus(scale) return tfp.distributions.MultivariateNormalDiag( loc=loc, scale_diag=scale, validate_args=True) def means_layers(): # TODO(b/179510447): align these parameters with Schulman 17. return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=0.1, seed=_get_seed()), name='means_projection_layer') def std_layers(): # TODO(b/179510447): align these parameters with Schulman 17. std_bias_initializer_value = np.log(np.exp(0.35) - 1) return bias_layer.BiasLayer( bias_initializer=tf.constant_initializer( value=std_bias_initializer_value)) def no_op_layers(): return tf.keras.layers.Lambda(lambda x: x) dense = functools.partial( tf.keras.layers.Dense, activation=tf.nn.tanh, kernel_initializer=tf.keras.initializers.Orthogonal( seed=_get_seed())) return sequential.Sequential( [dense(num_units) for num_units in fc_layer_units] + [means_layers()] + [tf.keras.layers.Lambda( lambda x: {'loc': x, 'scale': tf.zeros_like(x)})] + [nest_map.NestMap({ 'loc': no_op_layers(), 'scale': std_layers(), })] + # Create the output distribution from the mean and standard deviation. [tf.keras.layers.Lambda(create_dist)])
def create_q_network(num_actions): """Create a Q network following the architecture from Minh 15.""" kernel_initializer = tf.compat.v1.variance_scaling_initializer(scale=2.0) conv2d = functools.partial( tf.keras.layers.Conv2D, activation=tf.keras.activations.relu, kernel_initializer=kernel_initializer) dense = functools.partial( tf.keras.layers.Dense, activation=tf.keras.activations.relu, kernel_initializer=kernel_initializer) logits = functools.partial( tf.keras.layers.Dense, activation=None, kernel_initializer=kernel_initializer) return sequential.Sequential( # We divide the grayscale pixel values by 255 here rather than storing # normalized values because uint8s are 4x cheaper to store than float32s. [ tf.keras.layers.Lambda(lambda x: x / 255), conv2d(32, (8, 8), 4), conv2d(64, (4, 4), 2), conv2d(64, (3, 3), 1), tf.keras.layers.Flatten(), dense(512), logits(num_actions) ])
def testSequentialNetwork(self): output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1) network = tanh_normal_projection_network.TanhNormalProjectionNetwork( output_spec) inputs = tf.random.stateless_uniform(shape=[3, 5], seed=[0, 0]) output, _ = network(inputs, outer_rank=1) # Create a squashed distribution. def create_dist(loc_and_scale): ndims = output_spec.shape.num_elements() loc = loc_and_scale[..., :ndims] scale = tf.exp(loc_and_scale[..., ndims:]) distribution = tfp.distributions.MultivariateNormalDiag( loc=loc, scale_diag=scale, validate_args=True, ) return distribution_utils.scale_distribution_to_spec( distribution, output_spec) # Create a sequential network. sequential_network = sequential.Sequential( [network._projection_layer] + [tf.keras.layers.Lambda(create_dist)]) sequential_output, _ = sequential_network(inputs) # Check that mode and standard deviation are the same. self.evaluate(tf.compat.v1.global_variables_initializer()) self.assertAllClose(self.evaluate(output.mode()), self.evaluate(sequential_output.mode())) self.assertAllClose(self.evaluate(output.stddev()), self.evaluate(output.stddev()))
def testLearnerRaiseExceptionOnMismatchingBatchSetup(self): obs_spec = tensor_spec.TensorSpec([2], tf.float32) time_step_spec = ts.time_step_spec(obs_spec) action_spec = tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1) flat_action_spec = tf.nest.flatten(action_spec)[0] num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1 network = sequential.Sequential([ tf.keras.layers.Dense(num_actions, dtype=tf.float32), inner_reshape.InnerReshape([None], [num_actions]) ]) agent = behavioral_cloning_agent.BehavioralCloningAgent( time_step_spec, action_spec, cloning_network=network, optimizer=None) with self.assertRaisesRegex( RuntimeError, (r'The slot variable initialization failed. The learner assumes all ' r'experience tensors required an `outer_rank = \(None, ' r'agent.train_sequence_length\)`\. If that\'s not the case for your ' r'agent try setting `run_optimizer_variable_init=False`\.')): learner.Learner( root_dir=os.path.join(self.create_tempdir().full_path, 'learner'), train_step=train_utils.create_train_step(), agent=agent)
def create_sequential_critic_net(): value_layer_dict = { "patch": patch_pre_layer, "color": color_pre_layer, "motion": motion_pre_layer } # value_layer = sequential.Sequential([ # value_layer_dict, # tf.keras.layers.Lambda(tf.nest.flatten), # tf.keras.layers.Concatenate(), # tf.keras.layers.Dense(1)]) action_layer = tf.keras.layers.Dense(81) def sum_value_and_action_out(value_and_action_out): value_out_dict, action_out = value_and_action_out value_out = tf.concat(tf.nest.flatten(value_out_dict), axis=-1) # value_out = value_out_dict return tf.reshape(value_out + action_out, [1, -1]) return sequential.Sequential([ nest_map.NestMap((value_layer_dict, action_layer)), tf.keras.layers.Lambda(sum_value_and_action_out), tf.keras.layers.Dense(1) ])
def testLearnerRaiseExceptionOnMismatchingBatchSetup(self): obs_spec = tensor_spec.TensorSpec([2], tf.float32) time_step_spec = ts.time_step_spec(obs_spec) action_spec = tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1) flat_action_spec = tf.nest.flatten(action_spec)[0] num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1 network = sequential.Sequential([ tf.keras.layers.Dense(num_actions, dtype=tf.float32), inner_reshape.InnerReshape([None], [num_actions]) ]) agent = behavioral_cloning_agent.BehavioralCloningAgent( time_step_spec, action_spec, cloning_network=network, optimizer=None) with self.assertRaisesRegex( ValueError, 'All of the Tensors in `value` must have one outer dimension.' ): learner.Learner(root_dir=os.path.join( self.create_tempdir().full_path, 'learner'), train_step=train_utils.create_train_step(), agent=agent)
def create_critic_network(obs_fc_layer_units, action_fc_layer_units, joint_fc_layer_units): """Create a critic network for DDPG.""" def split_inputs(inputs): return {'observation': inputs[0], 'action': inputs[1]} obs_network = create_fc_network( obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer() action_network = create_fc_network( action_fc_layer_units ) if action_fc_layer_units else create_identity_layer() joint_network = create_fc_network( joint_fc_layer_units ) if joint_fc_layer_units else create_identity_layer() value_fc_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003)) return sequential.Sequential([ tf.keras.layers.Lambda(split_inputs), nest_map.NestMap({ 'observation': obs_network, 'action': action_network }), nest_map.NestFlatten(), tf.keras.layers.Concatenate(), joint_network, value_fc_layer, inner_reshape.InnerReshape([1], []) ])
def testMixOfNonRecurrentAndRecurrent(self): sequential = sequential_lib.Sequential( [ tf.keras.layers.Dense(2), tf.keras.layers.LSTM( 2, return_state=True, return_sequences=True), tf.keras.layers.RNN( tf.keras.layers.StackedRNNCells([ tf.keras.layers.LSTMCell(1), tf.keras.layers.LSTMCell(32), ], ), return_state=True, return_sequences=True, ), # Convert inner dimension to [4, 4, 2] for convolution. inner_reshape.InnerReshape([32], [4, 4, 2]), tf.keras.layers.Conv2D(2, 3), # Convert 3 inner dimensions to [?] for RNN. inner_reshape.InnerReshape([None] * 3, [-1]), tf.keras.layers.GRU( 2, return_state=True, return_sequences=True), dynamic_unroll_layer.DynamicUnroll( tf.keras.layers.LSTMCell(2)), ], input_spec=tf.TensorSpec((3, ), tf.float32)) self.assertEqual(sequential.input_tensor_spec, tf.TensorSpec((3, ), tf.float32)) output_spec = sequential.create_variables() self.assertEqual(output_spec, tf.TensorSpec((2, ), dtype=tf.float32)) tf.nest.map_structure( self.assertEqual, sequential.state_spec, ( [ # LSTM tf.TensorSpec((2, ), tf.float32), tf.TensorSpec((2, ), tf.float32), ], ( # RNN(StackedRNNCells) [ tf.TensorSpec((1, ), tf.float32), tf.TensorSpec((1, ), tf.float32), ], [ tf.TensorSpec((32, ), tf.float32), tf.TensorSpec((32, ), tf.float32), ], ), # GRU tf.TensorSpec((2, ), tf.float32), [ # DynamicUnroll tf.TensorSpec((2, ), tf.float32), tf.TensorSpec((2, ), tf.float32), ])) inputs = tf.ones((8, 10, 3), dtype=tf.float32) outputs, _ = sequential(inputs) self.assertEqual(outputs.shape, tf.TensorShape([8, 10, 2]))
def testAllZeroLengthStateSpecsShowAsEmptyState(self): sequential = sequential_lib.Sequential([ nest_map.NestMap({ 'a': tf.keras.layers.Dense(2), 'b': tf.keras.layers.Dense(3), }) ]) self.assertEqual(sequential.state_spec, ())
def _dense_net(self, structure): """Dense-layered sequential network""" nb_actions = self._nb_actions() layers = [ tf.keras.layers.Dense(size, **keys) for size, keys in structure ] layers.append(tf.keras.layers.Dense(nb_actions, activation=None)) return sequential.Sequential(layers)
def testTrainableVariablesNestedNetwork(self): sequential_inner = sequential_lib.Sequential( [tf.keras.layers.Dense(3), tf.keras.layers.Dense(4)]) sequential = sequential_lib.Sequential( [tf.keras.layers.Dense(3), sequential_inner]) sequential.create_variables(tf.TensorSpec(shape=(3, 2))) self.evaluate(tf.compat.v1.global_variables_initializer()) variables = self.evaluate(sequential.trainable_variables) self.assertLen(variables, 6) self.assertLen(sequential.variables, 6) self.assertLen(sequential_inner.variables, 4) self.assertTrue(sequential.trainable) sequential.trainable = False self.assertFalse(sequential.trainable) self.assertEmpty(sequential.trainable_variables) self.assertLen(sequential.variables, 6)
def create_recurrent_network(input_fc_layer_units, lstm_size, output_fc_layer_units, num_actions): rnn_cell = tf.keras.layers.StackedRNNCells( [fused_lstm_cell(s) for s in lstm_size]) return sequential.Sequential( [dense(num_units) for num_units in input_fc_layer_units] + [dynamic_unroll_layer.DynamicUnroll(rnn_cell)] + [dense(num_units) for num_units in output_fc_layer_units] + [logits(num_actions)])
def testMixOfNonRecurrentAndRecurrent(self): sequential = sequential_lib.Sequential([ tf.keras.layers.Dense(2), tf.keras.layers.LSTM(2, return_state=True, return_sequences=True), tf.keras.layers.RNN( tf.keras.layers.StackedRNNCells([ tf.keras.layers.LSTMCell(1), tf.keras.layers.LSTMCell(32), ], ), return_state=True, return_sequences=True, ), tf.keras.layers.Reshape((-1, 4, 4, 2)), tf.keras.layers.Conv2D(2, 3), tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten()), tf.keras.layers.GRU(2, return_state=True, return_sequences=True), dynamic_unroll_layer.DynamicUnroll(tf.keras.layers.LSTMCell(2)), ], input_spec=tf.TensorSpec( (3, ), tf.float32)) self.assertEqual(sequential.input_tensor_spec, tf.TensorSpec((3, ), tf.float32)) output_spec = sequential.create_variables() self.assertEqual(output_spec, tf.TensorSpec((2, ), dtype=tf.float32)) tf.nest.map_structure( self.assertEqual, sequential.state_spec, ( [ # LSTM tf.TensorSpec((2, ), tf.float32), tf.TensorSpec((2, ), tf.float32), ], [ # RNN(StackedRNNCells) [ tf.TensorSpec((1, ), tf.float32), tf.TensorSpec((1, ), tf.float32), ], [ tf.TensorSpec((32, ), tf.float32), tf.TensorSpec((32, ), tf.float32), ], ], [ # GRU tf.TensorSpec((2, ), tf.float32), ], [ # DynamicUnroll tf.TensorSpec((2, ), tf.float32), tf.TensorSpec((2, ), tf.float32), ])) inputs = tf.ones((8, 10, 3), dtype=tf.float32) outputs, _ = sequential(inputs) self.assertEqual(outputs.shape, tf.TensorShape([8, 10, 2]))
def testCopy(self): sequential = sequential_lib.Sequential([ tf.keras.layers.Dense(3), tf.keras.layers.Dense(4, use_bias=False) ]) clone = type(sequential).from_config(sequential.get_config()) self.assertLen(clone.layers, 2) for l1, l2 in zip(sequential.layers, clone.layers): self.assertEqual(l1.dtype, l2.dtype) self.assertEqual(l1.units, l2.units) self.assertEqual(l1.use_bias, l2.use_bias)
def q_lstm_network(num_actions): """Create the RNN based on layer parameters.""" lstm_cell = tf.keras.layers.LSTM( # pylint: disable=g-complex-comprehension 20, implementation=KERAS_LSTM_FUSED, return_state=True, return_sequences=True) return sequential.Sequential( [dense(50), lstm_cell, dense(20), logits(num_actions)])
def get_dummy_net(action_spec): flat_action_spec = tf.nest.flatten(action_spec)[0] num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1 return sequential.Sequential([ tf.keras.layers.Dense( num_actions, kernel_initializer=tf.compat.v1.initializers.constant([[2, 1], [1, 1]]), bias_initializer=tf.compat.v1.initializers.constant([[1], [1]]), dtype=tf.float32) ])
def testBuild(self): sequential = sequential_lib.Sequential( [tf.keras.layers.Dense(4, use_bias=False), tf.keras.layers.ReLU()]) inputs = np.ones((2, 3)) out, _ = sequential(inputs) self.evaluate(tf.compat.v1.global_variables_initializer()) out = self.evaluate(out) weights = self.evaluate(sequential.layers[0].weights[0]) expected = np.dot(inputs, weights) expected[expected < 0] = 0 self.assertAllClose(expected, out)
def __init__(self, input_tensor_spec, output_tensor_spec): num_actions = output_tensor_spec.shape.num_elements() self._sequential = sequential_lib.Sequential( [ tf.keras.layers.Dense(50), tf.keras.layers.Dense(10), tf.keras.layers.Dense(num_actions) ], input_spec=input_tensor_spec) # pytype: disable=wrong-arg-types super(ActorNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=self._sequential.state_spec, name='TestActorNetwork')
def create_sequential_actor_network(actor_fc_layers, action_tensor_spec): """Create a sequential actor network.""" def tile_as_nest(non_nested_output): return tf.nest.map_structure(lambda _: non_nested_output, action_tensor_spec) return sequential.Sequential( [dense(num_units) for num_units in actor_fc_layers] + [tf.keras.layers.Lambda(tile_as_nest)] + [ nest_map.NestMap( tf.nest.map_structure(_TanhNormalProjectionNetworkWrapper, action_tensor_spec)) ])
def testLossRNNSmokeTest(self, agent_class): q_net = sequential.Sequential([ tf.keras.layers.LSTM( 2, return_state=True, return_sequences=True, kernel_initializer=tf.constant_initializer(0.5), recurrent_initializer=tf.constant_initializer(0.5)), ]) agent = agent_class(self._time_step_spec, self._action_spec, q_network=q_net, gamma=0.95, optimizer=None) observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) time_steps = ts.restart(observations, batch_size=2) rewards = tf.constant([10, 20], dtype=tf.float32) discounts = tf.constant([0.7, 0.8], dtype=tf.float32) next_observations = tf.constant([[5, 6], [7, 8]], dtype=tf.float32) next_time_steps = ts.transition(next_observations, rewards, discounts) third_observations = tf.constant([[9, 10], [11, 12]], dtype=tf.float32) third_time_steps = ts.transition(third_observations, rewards, discounts) actions = tf.constant([0, 1], dtype=tf.int32) action_steps = policy_step.PolicyStep(actions) experience1 = trajectory.from_transition(time_steps, action_steps, next_time_steps) experience2 = trajectory.from_transition(next_time_steps, action_steps, third_time_steps) experience3 = trajectory.from_transition(third_time_steps, action_steps, third_time_steps) experience = tf.nest.map_structure( lambda x, y, z: tf.stack([x, y, z], axis=1), experience1, experience2, experience3) loss, _ = agent._loss(experience) self.evaluate(tf.compat.v1.global_variables_initializer()) # Smoke test, here to make sure the calculation does not change as we # modify preprocessing or other internals. expected_loss = 28.722265 self.assertAllClose(self.evaluate(loss), expected_loss)
def testCall(self): sequential = sequential_lib.Sequential( [tf.keras.layers.Dense(4, use_bias=False), tf.keras.layers.ReLU()], input_spec=tf.TensorSpec((3, ), tf.float32)) # pytype: disable=wrong-arg-types inputs = np.ones((2, 3)) out, state = sequential(inputs) self.assertEqual(state, ()) self.evaluate(tf.compat.v1.global_variables_initializer()) out = self.evaluate(out) weights = self.evaluate(sequential.layers[0].weights[0]) expected = np.dot(inputs, weights) expected[expected < 0] = 0 self.assertAllClose(expected, out)
def create_sequential_actor_net(fc_layer_units, action_tensor_spec): """Helper function for creating the actor network.""" def create_dist(loc_and_scale): ndims = action_tensor_spec.shape.num_elements() return tfp.distributions.MultivariateNormalDiag( loc=loc_and_scale[..., :ndims], scale_diag=tf.math.softplus(loc_and_scale[..., ndims:]), validate_args=True) def means_layers(): # TODO(b/179510447): align these parameters with Schulman 17. return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=0.1), name='means_projection_layer') def std_layers(): # TODO(b/179510447): align these parameters with Schulman 17. std_kernel_initializer_scale = 0.1 std_bias_initializer_value = np.log(np.exp(0.35) - 1) return tf.keras.layers.Dense( action_tensor_spec.shape.num_elements(), kernel_initializer=tf.keras.initializers.VarianceScaling( scale=std_kernel_initializer_scale), bias_initializer=tf.keras.initializers.Constant( value=std_bias_initializer_value)) dense = functools.partial( tf.keras.layers.Dense, activation=tf.nn.tanh, kernel_initializer=tf.keras.initializers.Orthogonal()) return sequential.Sequential( [dense(num_units) for num_units in fc_layer_units] + [tf.keras.layers.Lambda(lambda x: { 'loc': x, 'scale': x })] + [nest_map.NestMap({ 'loc': means_layers(), 'scale': std_layers() })] + [nest_map.NestFlatten()] + # Concatenate the maen and standard deviation output to feed into the # distribution layer. [tf.keras.layers.Concatenate(axis=-1)] + # Create the output distribution from the mean and standard deviation. [tf.keras.layers.Lambda(create_dist)])
def create_sequential_critic_net(l2_regularization_weight=0.0, shared_layer=None): value_layer = tf.keras.layers.Dense( 1, kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight), kernel_initializer=tf.initializers.constant([[0], [1]]), bias_initializer=tf.initializers.constant([[0]])) if shared_layer: value_layer = sequential.Sequential([value_layer, shared_layer]) action_layer = tf.keras.layers.Dense( 1, kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight), kernel_initializer=tf.initializers.constant([[1]]), bias_initializer=tf.initializers.constant([[0]])) def sum_value_and_action_out(value_and_action_out): value_out, action_out = value_and_action_out return tf.reshape(value_out + action_out, [-1]) return sequential.Sequential([ nest_map.NestMap((value_layer, action_layer)), tf.keras.layers.Lambda(sum_value_and_action_out) ])
def testTrainableVariablesWithNonTrainableLayer(self): non_trainable_layer = tf.keras.layers.Dense(4) non_trainable_layer.trainable = False sequential = sequential_lib.Sequential( [tf.keras.layers.Dense(3), non_trainable_layer]) sequential.create_variables(tf.TensorSpec(shape=(3, 2))) self.evaluate(tf.compat.v1.global_variables_initializer()) variables = self.evaluate(sequential.trainable_variables) self.assertLen(variables, 2) self.assertLen(sequential.variables, 4) self.assertTrue(sequential.trainable) sequential.trainable = False self.assertFalse(sequential.trainable) self.assertEmpty(sequential.trainable_variables) self.assertLen(sequential.variables, 4)
def _conv_net(self, structure): """Conv2D sequential network""" nb_actions = self._nb_actions() layers = [ tf.keras.layers.Lambda(lambda x: x / 255), tf.keras.layers.Conv2D(32, (8, 8), strides=(4, 4), activation="relu"), tf.keras.layers.Conv2D(64, (4, 4), strides=(2, 2), activation="relu"), tf.keras.layers.Conv2D(64, (3, 3), activation="relu"), tf.keras.layers.Flatten(), tf.keras.layers.Dense(256, activation="relu") ] layers = layers + [ tf.keras.layers.Dense(nb_actions, activation="linear") ] return sequential.Sequential(layers)
def build_dummy_sequential_net(fc_layer_params, action_spec): """Build a dummy sequential network.""" num_actions = action_spec.maximum - action_spec.minimum + 1 logits = functools.partial( tf.keras.layers.Dense, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), bias_initializer=tf.compat.v1.initializers.constant(-0.2)) dense = functools.partial( tf.keras.layers.Dense, activation=tf.keras.activations.relu, kernel_initializer=tf.compat.v1.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal')) return sequential.Sequential( [dense(num_units) for num_units in fc_layer_params] + [logits(num_actions)])
def create_sequential_actor_net(): def create_dist(loc_and_scale): # Bring my_action into [2.0, 3.0]: # (-inf, inf) -> (-1, 1) -> (-0.5, 0.5) -> (2, 3) my_action = tfp.bijectors.Chain([ tfp.bijectors.Shift(2.5), tfp.bijectors.Scale(0.5), tfp.bijectors.Tanh() ])(tfd.Normal(loc=loc_and_scale[..., 0], scale=tf.math.softplus(loc_and_scale[..., 1]), validate_args=True)) return { 'my_action': my_action, } return sequential.Sequential([ tf.keras.layers.Dense(4), tf.keras.layers.Dense(2), tf.keras.layers.Lambda(create_dist) ])
def create_actor_network(fc_layer_units, action_spec): """Create an actor network for DDPG.""" flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action tensor is supported by this network') flat_action_spec = flat_action_spec[0] fc_layers = [dense(num_units) for num_units in fc_layer_units] num_actions = flat_action_spec.shape.num_elements() action_fc_layer = tf.keras.layers.Dense( num_actions, activation=tf.keras.activations.tanh, kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003)) scaling_layer = tf.keras.layers.Lambda( lambda x: common.scale_to_spec(x, flat_action_spec)) return sequential.Sequential(fc_layers + [action_fc_layer, scaling_layer])