Example #1
0
  def testCreateAndCall(self):
    net = sequential.Sequential([
        nest_map.NestMap(
            {'inp1': tf.keras.layers.Dense(8),
             'inp2': sequential.Sequential([
                 tf.keras.layers.Conv2D(2, 3),
                 # Convert 3 inner dimensions to [8] for RNN.
                 inner_reshape.InnerReshape([None] * 3, [8]),
             ]),
             'inp3': tf.keras.layers.LSTM(
                 8, return_state=True, return_sequences=True)}),
        nest_map.NestFlatten(),
        tf.keras.layers.Add()])
    self.assertEqual(
        net.state_spec,
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorSpec(shape=(8,), dtype=tf.float32)],),
        },))
    output_spec = net.create_variables(
        {
            'inp1': tf.TensorSpec(shape=(3,), dtype=tf.float32),
            'inp2': tf.TensorSpec(shape=(4, 4, 2,), dtype=tf.float32),
            'inp3': tf.TensorSpec(shape=(3,), dtype=tf.float32),
        })
    self.assertEqual(output_spec, tf.TensorSpec(shape=(8,), dtype=tf.float32))

    inputs = {
        'inp1': tf.ones((8, 10, 3), dtype=tf.float32),
        'inp2': tf.ones((8, 10, 4, 4, 2), dtype=tf.float32),
        'inp3': tf.ones((8, 10, 3), dtype=tf.float32)
    }
    output, next_state = net(inputs)
    self.assertEqual(output.shape, tf.TensorShape([8, 10, 8]))
    self.assertEqual(
        tf.nest.map_structure(lambda t: t.shape, next_state),
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorShape([8, 8])],),
        },))

    # Test passing in a state.
    output, next_state = net(inputs, next_state)
    self.assertEqual(output.shape, tf.TensorShape([8, 10, 8]))
    self.assertEqual(
        tf.nest.map_structure(lambda t: t.shape, next_state),
        ({
            'inp1': (),
            'inp2': (),
            'inp3': (2 * [tf.TensorShape([8, 8])],),
        },))
Example #2
0
def create_sequential_critic_network(obs_fc_layer_units, action_fc_layer_units,
                                     joint_fc_layer_units):
    """Create a sequential critic network."""

    # Split the inputs into observations and actions.
    def split_inputs(inputs):
        return {'observation': inputs[0], 'action': inputs[1]}

    # Create an observation network.
    obs_network = (create_fc_network(obs_fc_layer_units)
                   if obs_fc_layer_units else create_identity_layer())

    # Create an action network.
    action_network = (create_fc_network(action_fc_layer_units)
                      if action_fc_layer_units else create_identity_layer())

    # Create a joint network.
    joint_network = (create_fc_network(joint_fc_layer_units)
                     if joint_fc_layer_units else create_identity_layer())

    # Final layer.
    value_layer = tf.keras.layers.Dense(1, kernel_initializer='glorot_uniform')

    return sequential.Sequential([
        tf.keras.layers.Lambda(split_inputs),
        nest_map.NestMap({
            'observation': obs_network,
            'action': action_network
        }),
        nest_map.NestFlatten(),
        tf.keras.layers.Concatenate(), joint_network, value_layer,
        inner_reshape.InnerReshape(current_shape=[1], new_shape=[])
    ],
                                 name='sequential_critic')
Example #3
0
  def create_sequential_actor_net(self,
                                  fc_layer_units,
                                  action_tensor_spec,
                                  seed=None):
    """Helper method for creating the actor network."""

    self._seed_stream = self.seed_stream_class(
        seed=seed, salt='tf_agents_sequential_layers')

    def _get_seed():
      seed = self._seed_stream()
      if seed is not None:
        seed = seed % sys.maxsize
      return seed

    def create_dist(loc_and_scale):
      loc = loc_and_scale['loc']
      loc = tanh_and_scale_to_spec(loc, action_tensor_spec)

      scale = loc_and_scale['scale']
      scale = tf.math.softplus(scale)

      return tfp.distributions.MultivariateNormalDiag(
          loc=loc, scale_diag=scale, validate_args=True)

    def means_layers():
      # TODO(b/179510447): align these parameters with Schulman 17.
      return tf.keras.layers.Dense(
          action_tensor_spec.shape.num_elements(),
          kernel_initializer=tf.keras.initializers.VarianceScaling(
              scale=0.1, seed=_get_seed()),
          name='means_projection_layer')

    def std_layers():
      # TODO(b/179510447): align these parameters with Schulman 17.
      std_bias_initializer_value = np.log(np.exp(0.35) - 1)
      return bias_layer.BiasLayer(
          bias_initializer=tf.constant_initializer(
              value=std_bias_initializer_value))

    def no_op_layers():
      return tf.keras.layers.Lambda(lambda x: x)

    dense = functools.partial(
        tf.keras.layers.Dense,
        activation=tf.nn.tanh,
        kernel_initializer=tf.keras.initializers.Orthogonal(
            seed=_get_seed()))

    return sequential.Sequential(
        [dense(num_units) for num_units in fc_layer_units] +
        [means_layers()] +
        [tf.keras.layers.Lambda(
            lambda x: {'loc': x, 'scale': tf.zeros_like(x)})] +
        [nest_map.NestMap({
            'loc': no_op_layers(),
            'scale': std_layers(),
        })] +
        # Create the output distribution from the mean and standard deviation.
        [tf.keras.layers.Lambda(create_dist)])
Example #4
0
def create_q_network(num_actions):
  """Create a Q network following the architecture from Minh 15."""

  kernel_initializer = tf.compat.v1.variance_scaling_initializer(scale=2.0)
  conv2d = functools.partial(
      tf.keras.layers.Conv2D,
      activation=tf.keras.activations.relu,
      kernel_initializer=kernel_initializer)
  dense = functools.partial(
      tf.keras.layers.Dense,
      activation=tf.keras.activations.relu,
      kernel_initializer=kernel_initializer)
  logits = functools.partial(
      tf.keras.layers.Dense,
      activation=None,
      kernel_initializer=kernel_initializer)

  return sequential.Sequential(
      # We divide the grayscale pixel values by 255 here rather than storing
      # normalized values because uint8s are 4x cheaper to store than float32s.
      [
          tf.keras.layers.Lambda(lambda x: x / 255),
          conv2d(32, (8, 8), 4),
          conv2d(64, (4, 4), 2),
          conv2d(64, (3, 3), 1),
          tf.keras.layers.Flatten(),
          dense(512),
          logits(num_actions)
      ])
Example #5
0
    def testSequentialNetwork(self):
        output_spec = tensor_spec.BoundedTensorSpec([2], tf.float32, 0, 1)
        network = tanh_normal_projection_network.TanhNormalProjectionNetwork(
            output_spec)

        inputs = tf.random.stateless_uniform(shape=[3, 5], seed=[0, 0])
        output, _ = network(inputs, outer_rank=1)

        # Create a squashed distribution.
        def create_dist(loc_and_scale):
            ndims = output_spec.shape.num_elements()
            loc = loc_and_scale[..., :ndims]
            scale = tf.exp(loc_and_scale[..., ndims:])

            distribution = tfp.distributions.MultivariateNormalDiag(
                loc=loc,
                scale_diag=scale,
                validate_args=True,
            )
            return distribution_utils.scale_distribution_to_spec(
                distribution, output_spec)

        # Create a sequential network.
        sequential_network = sequential.Sequential(
            [network._projection_layer] +
            [tf.keras.layers.Lambda(create_dist)])
        sequential_output, _ = sequential_network(inputs)

        # Check that mode and standard deviation are the same.
        self.evaluate(tf.compat.v1.global_variables_initializer())
        self.assertAllClose(self.evaluate(output.mode()),
                            self.evaluate(sequential_output.mode()))
        self.assertAllClose(self.evaluate(output.stddev()),
                            self.evaluate(output.stddev()))
Example #6
0
  def testLearnerRaiseExceptionOnMismatchingBatchSetup(self):
    obs_spec = tensor_spec.TensorSpec([2], tf.float32)
    time_step_spec = ts.time_step_spec(obs_spec)
    action_spec = tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1)
    flat_action_spec = tf.nest.flatten(action_spec)[0]
    num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1

    network = sequential.Sequential([
        tf.keras.layers.Dense(num_actions, dtype=tf.float32),
        inner_reshape.InnerReshape([None], [num_actions])
    ])

    agent = behavioral_cloning_agent.BehavioralCloningAgent(
        time_step_spec, action_spec, cloning_network=network, optimizer=None)

    with self.assertRaisesRegex(
        RuntimeError,
        (r'The slot variable initialization failed. The learner assumes all '
         r'experience tensors required an `outer_rank = \(None, '
         r'agent.train_sequence_length\)`\. If that\'s not the case for your '
         r'agent try setting `run_optimizer_variable_init=False`\.')):
      learner.Learner(
          root_dir=os.path.join(self.create_tempdir().full_path, 'learner'),
          train_step=train_utils.create_train_step(),
          agent=agent)
Example #7
0
def create_sequential_critic_net():
    value_layer_dict = {
        "patch": patch_pre_layer,
        "color": color_pre_layer,
        "motion": motion_pre_layer
    }
    #   value_layer = sequential.Sequential([
    #       value_layer_dict,
    #       tf.keras.layers.Lambda(tf.nest.flatten),
    #       tf.keras.layers.Concatenate(),
    #       tf.keras.layers.Dense(1)])

    action_layer = tf.keras.layers.Dense(81)

    def sum_value_and_action_out(value_and_action_out):
        value_out_dict, action_out = value_and_action_out
        value_out = tf.concat(tf.nest.flatten(value_out_dict), axis=-1)
        # value_out = value_out_dict
        return tf.reshape(value_out + action_out, [1, -1])

    return sequential.Sequential([
        nest_map.NestMap((value_layer_dict, action_layer)),
        tf.keras.layers.Lambda(sum_value_and_action_out),
        tf.keras.layers.Dense(1)
    ])
Example #8
0
    def testLearnerRaiseExceptionOnMismatchingBatchSetup(self):
        obs_spec = tensor_spec.TensorSpec([2], tf.float32)
        time_step_spec = ts.time_step_spec(obs_spec)
        action_spec = tensor_spec.BoundedTensorSpec([], tf.int32, 0, 1)
        flat_action_spec = tf.nest.flatten(action_spec)[0]
        num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1

        network = sequential.Sequential([
            tf.keras.layers.Dense(num_actions, dtype=tf.float32),
            inner_reshape.InnerReshape([None], [num_actions])
        ])

        agent = behavioral_cloning_agent.BehavioralCloningAgent(
            time_step_spec,
            action_spec,
            cloning_network=network,
            optimizer=None)

        with self.assertRaisesRegex(
                ValueError,
                'All of the Tensors in `value` must have one outer dimension.'
        ):
            learner.Learner(root_dir=os.path.join(
                self.create_tempdir().full_path, 'learner'),
                            train_step=train_utils.create_train_step(),
                            agent=agent)
Example #9
0
def create_critic_network(obs_fc_layer_units, action_fc_layer_units,
                          joint_fc_layer_units):
    """Create a critic network for DDPG."""
    def split_inputs(inputs):
        return {'observation': inputs[0], 'action': inputs[1]}

    obs_network = create_fc_network(
        obs_fc_layer_units) if obs_fc_layer_units else create_identity_layer()
    action_network = create_fc_network(
        action_fc_layer_units
    ) if action_fc_layer_units else create_identity_layer()
    joint_network = create_fc_network(
        joint_fc_layer_units
    ) if joint_fc_layer_units else create_identity_layer()
    value_fc_layer = tf.keras.layers.Dense(
        1,
        activation=None,
        kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003,
                                                               maxval=0.003))

    return sequential.Sequential([
        tf.keras.layers.Lambda(split_inputs),
        nest_map.NestMap({
            'observation': obs_network,
            'action': action_network
        }),
        nest_map.NestFlatten(),
        tf.keras.layers.Concatenate(), joint_network, value_fc_layer,
        inner_reshape.InnerReshape([1], [])
    ])
Example #10
0
    def testMixOfNonRecurrentAndRecurrent(self):
        sequential = sequential_lib.Sequential(
            [
                tf.keras.layers.Dense(2),
                tf.keras.layers.LSTM(
                    2, return_state=True, return_sequences=True),
                tf.keras.layers.RNN(
                    tf.keras.layers.StackedRNNCells([
                        tf.keras.layers.LSTMCell(1),
                        tf.keras.layers.LSTMCell(32),
                    ], ),
                    return_state=True,
                    return_sequences=True,
                ),
                # Convert inner dimension to [4, 4, 2] for convolution.
                inner_reshape.InnerReshape([32], [4, 4, 2]),
                tf.keras.layers.Conv2D(2, 3),
                # Convert 3 inner dimensions to [?] for RNN.
                inner_reshape.InnerReshape([None] * 3, [-1]),
                tf.keras.layers.GRU(
                    2, return_state=True, return_sequences=True),
                dynamic_unroll_layer.DynamicUnroll(
                    tf.keras.layers.LSTMCell(2)),
            ],
            input_spec=tf.TensorSpec((3, ), tf.float32))
        self.assertEqual(sequential.input_tensor_spec,
                         tf.TensorSpec((3, ), tf.float32))

        output_spec = sequential.create_variables()
        self.assertEqual(output_spec, tf.TensorSpec((2, ), dtype=tf.float32))

        tf.nest.map_structure(
            self.assertEqual,
            sequential.state_spec,
            (
                [  # LSTM
                    tf.TensorSpec((2, ), tf.float32),
                    tf.TensorSpec((2, ), tf.float32),
                ],
                (  # RNN(StackedRNNCells)
                    [
                        tf.TensorSpec((1, ), tf.float32),
                        tf.TensorSpec((1, ), tf.float32),
                    ],
                    [
                        tf.TensorSpec((32, ), tf.float32),
                        tf.TensorSpec((32, ), tf.float32),
                    ],
                ),
                # GRU
                tf.TensorSpec((2, ), tf.float32),
                [  # DynamicUnroll
                    tf.TensorSpec((2, ), tf.float32),
                    tf.TensorSpec((2, ), tf.float32),
                ]))

        inputs = tf.ones((8, 10, 3), dtype=tf.float32)
        outputs, _ = sequential(inputs)
        self.assertEqual(outputs.shape, tf.TensorShape([8, 10, 2]))
Example #11
0
 def testAllZeroLengthStateSpecsShowAsEmptyState(self):
     sequential = sequential_lib.Sequential([
         nest_map.NestMap({
             'a': tf.keras.layers.Dense(2),
             'b': tf.keras.layers.Dense(3),
         })
     ])
     self.assertEqual(sequential.state_spec, ())
Example #12
0
 def _dense_net(self, structure):
     """Dense-layered sequential network"""
     nb_actions = self._nb_actions()
     layers = [
         tf.keras.layers.Dense(size, **keys) for size, keys in structure
     ]
     layers.append(tf.keras.layers.Dense(nb_actions, activation=None))
     return sequential.Sequential(layers)
Example #13
0
    def testTrainableVariablesNestedNetwork(self):
        sequential_inner = sequential_lib.Sequential(
            [tf.keras.layers.Dense(3),
             tf.keras.layers.Dense(4)])
        sequential = sequential_lib.Sequential(
            [tf.keras.layers.Dense(3), sequential_inner])
        sequential.create_variables(tf.TensorSpec(shape=(3, 2)))
        self.evaluate(tf.compat.v1.global_variables_initializer())
        variables = self.evaluate(sequential.trainable_variables)

        self.assertLen(variables, 6)
        self.assertLen(sequential.variables, 6)
        self.assertLen(sequential_inner.variables, 4)
        self.assertTrue(sequential.trainable)
        sequential.trainable = False
        self.assertFalse(sequential.trainable)
        self.assertEmpty(sequential.trainable_variables)
        self.assertLen(sequential.variables, 6)
def create_recurrent_network(input_fc_layer_units, lstm_size,
                             output_fc_layer_units, num_actions):
    rnn_cell = tf.keras.layers.StackedRNNCells(
        [fused_lstm_cell(s) for s in lstm_size])
    return sequential.Sequential(
        [dense(num_units) for num_units in input_fc_layer_units] +
        [dynamic_unroll_layer.DynamicUnroll(rnn_cell)] +
        [dense(num_units)
         for num_units in output_fc_layer_units] + [logits(num_actions)])
Example #15
0
    def testMixOfNonRecurrentAndRecurrent(self):
        sequential = sequential_lib.Sequential([
            tf.keras.layers.Dense(2),
            tf.keras.layers.LSTM(2, return_state=True, return_sequences=True),
            tf.keras.layers.RNN(
                tf.keras.layers.StackedRNNCells([
                    tf.keras.layers.LSTMCell(1),
                    tf.keras.layers.LSTMCell(32),
                ], ),
                return_state=True,
                return_sequences=True,
            ),
            tf.keras.layers.Reshape((-1, 4, 4, 2)),
            tf.keras.layers.Conv2D(2, 3),
            tf.keras.layers.TimeDistributed(tf.keras.layers.Flatten()),
            tf.keras.layers.GRU(2, return_state=True, return_sequences=True),
            dynamic_unroll_layer.DynamicUnroll(tf.keras.layers.LSTMCell(2)),
        ],
                                               input_spec=tf.TensorSpec(
                                                   (3, ), tf.float32))
        self.assertEqual(sequential.input_tensor_spec,
                         tf.TensorSpec((3, ), tf.float32))

        output_spec = sequential.create_variables()
        self.assertEqual(output_spec, tf.TensorSpec((2, ), dtype=tf.float32))

        tf.nest.map_structure(
            self.assertEqual,
            sequential.state_spec,
            (
                [  # LSTM
                    tf.TensorSpec((2, ), tf.float32),
                    tf.TensorSpec((2, ), tf.float32),
                ],
                [  # RNN(StackedRNNCells)
                    [
                        tf.TensorSpec((1, ), tf.float32),
                        tf.TensorSpec((1, ), tf.float32),
                    ],
                    [
                        tf.TensorSpec((32, ), tf.float32),
                        tf.TensorSpec((32, ), tf.float32),
                    ],
                ],
                [  # GRU
                    tf.TensorSpec((2, ), tf.float32),
                ],
                [  # DynamicUnroll
                    tf.TensorSpec((2, ), tf.float32),
                    tf.TensorSpec((2, ), tf.float32),
                ]))

        inputs = tf.ones((8, 10, 3), dtype=tf.float32)
        outputs, _ = sequential(inputs)
        self.assertEqual(outputs.shape, tf.TensorShape([8, 10, 2]))
Example #16
0
 def testCopy(self):
     sequential = sequential_lib.Sequential([
         tf.keras.layers.Dense(3),
         tf.keras.layers.Dense(4, use_bias=False)
     ])
     clone = type(sequential).from_config(sequential.get_config())
     self.assertLen(clone.layers, 2)
     for l1, l2 in zip(sequential.layers, clone.layers):
         self.assertEqual(l1.dtype, l2.dtype)
         self.assertEqual(l1.units, l2.units)
         self.assertEqual(l1.use_bias, l2.use_bias)
Example #17
0
def q_lstm_network(num_actions):
    """Create the RNN based on layer parameters."""

    lstm_cell = tf.keras.layers.LSTM(  # pylint: disable=g-complex-comprehension
        20,
        implementation=KERAS_LSTM_FUSED,
        return_state=True,
        return_sequences=True)
    return sequential.Sequential(
        [dense(50), lstm_cell,
         dense(20), logits(num_actions)])
Example #18
0
def get_dummy_net(action_spec):
    flat_action_spec = tf.nest.flatten(action_spec)[0]
    num_actions = flat_action_spec.maximum - flat_action_spec.minimum + 1

    return sequential.Sequential([
        tf.keras.layers.Dense(
            num_actions,
            kernel_initializer=tf.compat.v1.initializers.constant([[2, 1],
                                                                   [1, 1]]),
            bias_initializer=tf.compat.v1.initializers.constant([[1], [1]]),
            dtype=tf.float32)
    ])
Example #19
0
 def testBuild(self):
     sequential = sequential_lib.Sequential(
         [tf.keras.layers.Dense(4, use_bias=False),
          tf.keras.layers.ReLU()])
     inputs = np.ones((2, 3))
     out, _ = sequential(inputs)
     self.evaluate(tf.compat.v1.global_variables_initializer())
     out = self.evaluate(out)
     weights = self.evaluate(sequential.layers[0].weights[0])
     expected = np.dot(inputs, weights)
     expected[expected < 0] = 0
     self.assertAllClose(expected, out)
 def __init__(self, input_tensor_spec, output_tensor_spec):
     num_actions = output_tensor_spec.shape.num_elements()
     self._sequential = sequential_lib.Sequential(
         [
             tf.keras.layers.Dense(50),
             tf.keras.layers.Dense(10),
             tf.keras.layers.Dense(num_actions)
         ],
         input_spec=input_tensor_spec)  # pytype: disable=wrong-arg-types
     super(ActorNetwork,
           self).__init__(input_tensor_spec=input_tensor_spec,
                          state_spec=self._sequential.state_spec,
                          name='TestActorNetwork')
Example #21
0
def create_sequential_actor_network(actor_fc_layers, action_tensor_spec):
    """Create a sequential actor network."""
    def tile_as_nest(non_nested_output):
        return tf.nest.map_structure(lambda _: non_nested_output,
                                     action_tensor_spec)

    return sequential.Sequential(
        [dense(num_units) for num_units in actor_fc_layers] +
        [tf.keras.layers.Lambda(tile_as_nest)] + [
            nest_map.NestMap(
                tf.nest.map_structure(_TanhNormalProjectionNetworkWrapper,
                                      action_tensor_spec))
        ])
Example #22
0
    def testLossRNNSmokeTest(self, agent_class):
        q_net = sequential.Sequential([
            tf.keras.layers.LSTM(
                2,
                return_state=True,
                return_sequences=True,
                kernel_initializer=tf.constant_initializer(0.5),
                recurrent_initializer=tf.constant_initializer(0.5)),
        ])

        agent = agent_class(self._time_step_spec,
                            self._action_spec,
                            q_network=q_net,
                            gamma=0.95,
                            optimizer=None)

        observations = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
        time_steps = ts.restart(observations, batch_size=2)

        rewards = tf.constant([10, 20], dtype=tf.float32)
        discounts = tf.constant([0.7, 0.8], dtype=tf.float32)

        next_observations = tf.constant([[5, 6], [7, 8]], dtype=tf.float32)
        next_time_steps = ts.transition(next_observations, rewards, discounts)
        third_observations = tf.constant([[9, 10], [11, 12]], dtype=tf.float32)
        third_time_steps = ts.transition(third_observations, rewards,
                                         discounts)

        actions = tf.constant([0, 1], dtype=tf.int32)
        action_steps = policy_step.PolicyStep(actions)

        experience1 = trajectory.from_transition(time_steps, action_steps,
                                                 next_time_steps)
        experience2 = trajectory.from_transition(next_time_steps, action_steps,
                                                 third_time_steps)
        experience3 = trajectory.from_transition(third_time_steps,
                                                 action_steps,
                                                 third_time_steps)

        experience = tf.nest.map_structure(
            lambda x, y, z: tf.stack([x, y, z], axis=1), experience1,
            experience2, experience3)

        loss, _ = agent._loss(experience)

        self.evaluate(tf.compat.v1.global_variables_initializer())

        # Smoke test, here to make sure the calculation does not change as we
        # modify preprocessing or other internals.
        expected_loss = 28.722265
        self.assertAllClose(self.evaluate(loss), expected_loss)
 def testCall(self):
     sequential = sequential_lib.Sequential(
         [tf.keras.layers.Dense(4, use_bias=False),
          tf.keras.layers.ReLU()],
         input_spec=tf.TensorSpec((3, ), tf.float32))  # pytype: disable=wrong-arg-types
     inputs = np.ones((2, 3))
     out, state = sequential(inputs)
     self.assertEqual(state, ())
     self.evaluate(tf.compat.v1.global_variables_initializer())
     out = self.evaluate(out)
     weights = self.evaluate(sequential.layers[0].weights[0])
     expected = np.dot(inputs, weights)
     expected[expected < 0] = 0
     self.assertAllClose(expected, out)
Example #24
0
def create_sequential_actor_net(fc_layer_units, action_tensor_spec):
    """Helper function for creating the actor network."""
    def create_dist(loc_and_scale):

        ndims = action_tensor_spec.shape.num_elements()
        return tfp.distributions.MultivariateNormalDiag(
            loc=loc_and_scale[..., :ndims],
            scale_diag=tf.math.softplus(loc_and_scale[..., ndims:]),
            validate_args=True)

    def means_layers():
        # TODO(b/179510447): align these parameters with Schulman 17.
        return tf.keras.layers.Dense(
            action_tensor_spec.shape.num_elements(),
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=0.1),
            name='means_projection_layer')

    def std_layers():
        # TODO(b/179510447): align these parameters with Schulman 17.
        std_kernel_initializer_scale = 0.1
        std_bias_initializer_value = np.log(np.exp(0.35) - 1)
        return tf.keras.layers.Dense(
            action_tensor_spec.shape.num_elements(),
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=std_kernel_initializer_scale),
            bias_initializer=tf.keras.initializers.Constant(
                value=std_bias_initializer_value))

    dense = functools.partial(
        tf.keras.layers.Dense,
        activation=tf.nn.tanh,
        kernel_initializer=tf.keras.initializers.Orthogonal())

    return sequential.Sequential(
        [dense(num_units) for num_units in fc_layer_units] +
        [tf.keras.layers.Lambda(lambda x: {
            'loc': x,
            'scale': x
        })] +
        [nest_map.NestMap({
            'loc': means_layers(),
            'scale': std_layers()
        })] + [nest_map.NestFlatten()] +
        # Concatenate the maen and standard deviation output to feed into the
        # distribution layer.
        [tf.keras.layers.Concatenate(axis=-1)] +
        # Create the output distribution from the mean and standard deviation.
        [tf.keras.layers.Lambda(create_dist)])
Example #25
0
def create_sequential_critic_net(l2_regularization_weight=0.0,
                                 shared_layer=None):
  value_layer = tf.keras.layers.Dense(
      1,
      kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight),
      kernel_initializer=tf.initializers.constant([[0], [1]]),
      bias_initializer=tf.initializers.constant([[0]]))
  if shared_layer:
    value_layer = sequential.Sequential([value_layer, shared_layer])

  action_layer = tf.keras.layers.Dense(
      1,
      kernel_regularizer=tf.keras.regularizers.l2(l2_regularization_weight),
      kernel_initializer=tf.initializers.constant([[1]]),
      bias_initializer=tf.initializers.constant([[0]]))

  def sum_value_and_action_out(value_and_action_out):
    value_out, action_out = value_and_action_out
    return tf.reshape(value_out + action_out, [-1])

  return sequential.Sequential([
      nest_map.NestMap((value_layer, action_layer)),
      tf.keras.layers.Lambda(sum_value_and_action_out)
  ])
Example #26
0
    def testTrainableVariablesWithNonTrainableLayer(self):
        non_trainable_layer = tf.keras.layers.Dense(4)
        non_trainable_layer.trainable = False

        sequential = sequential_lib.Sequential(
            [tf.keras.layers.Dense(3), non_trainable_layer])
        sequential.create_variables(tf.TensorSpec(shape=(3, 2)))
        self.evaluate(tf.compat.v1.global_variables_initializer())
        variables = self.evaluate(sequential.trainable_variables)
        self.assertLen(variables, 2)
        self.assertLen(sequential.variables, 4)
        self.assertTrue(sequential.trainable)
        sequential.trainable = False
        self.assertFalse(sequential.trainable)
        self.assertEmpty(sequential.trainable_variables)
        self.assertLen(sequential.variables, 4)
Example #27
0
 def _conv_net(self, structure):
     """Conv2D sequential network"""
     nb_actions = self._nb_actions()
     layers = [
         tf.keras.layers.Lambda(lambda x: x / 255),
         tf.keras.layers.Conv2D(32, (8, 8),
                                strides=(4, 4),
                                activation="relu"),
         tf.keras.layers.Conv2D(64, (4, 4),
                                strides=(2, 2),
                                activation="relu"),
         tf.keras.layers.Conv2D(64, (3, 3), activation="relu"),
         tf.keras.layers.Flatten(),
         tf.keras.layers.Dense(256, activation="relu")
     ]
     layers = layers + [
         tf.keras.layers.Dense(nb_actions, activation="linear")
     ]
     return sequential.Sequential(layers)
Example #28
0
def build_dummy_sequential_net(fc_layer_params, action_spec):
    """Build a dummy sequential network."""
    num_actions = action_spec.maximum - action_spec.minimum + 1

    logits = functools.partial(
        tf.keras.layers.Dense,
        activation=None,
        kernel_initializer=tf.compat.v1.initializers.random_uniform(
            minval=-0.03, maxval=0.03),
        bias_initializer=tf.compat.v1.initializers.constant(-0.2))

    dense = functools.partial(
        tf.keras.layers.Dense,
        activation=tf.keras.activations.relu,
        kernel_initializer=tf.compat.v1.variance_scaling_initializer(
            scale=2.0, mode='fan_in', distribution='truncated_normal'))

    return sequential.Sequential(
        [dense(num_units)
         for num_units in fc_layer_params] + [logits(num_actions)])
Example #29
0
def create_sequential_actor_net():
    def create_dist(loc_and_scale):
        # Bring my_action into [2.0, 3.0]:
        #  (-inf, inf) -> (-1, 1) -> (-0.5, 0.5) -> (2, 3)
        my_action = tfp.bijectors.Chain([
            tfp.bijectors.Shift(2.5),
            tfp.bijectors.Scale(0.5),
            tfp.bijectors.Tanh()
        ])(tfd.Normal(loc=loc_and_scale[..., 0],
                      scale=tf.math.softplus(loc_and_scale[..., 1]),
                      validate_args=True))
        return {
            'my_action': my_action,
        }

    return sequential.Sequential([
        tf.keras.layers.Dense(4),
        tf.keras.layers.Dense(2),
        tf.keras.layers.Lambda(create_dist)
    ])
Example #30
0
def create_actor_network(fc_layer_units, action_spec):
    """Create an actor network for DDPG."""
    flat_action_spec = tf.nest.flatten(action_spec)
    if len(flat_action_spec) > 1:
        raise ValueError(
            'Only a single action tensor is supported by this network')
    flat_action_spec = flat_action_spec[0]

    fc_layers = [dense(num_units) for num_units in fc_layer_units]

    num_actions = flat_action_spec.shape.num_elements()
    action_fc_layer = tf.keras.layers.Dense(
        num_actions,
        activation=tf.keras.activations.tanh,
        kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003,
                                                               maxval=0.003))

    scaling_layer = tf.keras.layers.Lambda(
        lambda x: common.scale_to_spec(x, flat_action_spec))
    return sequential.Sequential(fc_layers + [action_fc_layer, scaling_layer])