Exemple #1
0
    def test_mlp_layers(self):
        layers = utils.mlp_layers(conv_layer_params=[(3, 4, 5), (4, 6, 8)],
                                  fc_layer_params=[10, 20],
                                  activation_fn=tf.keras.activations.tanh,
                                  name='testnet')
        self.assertEqual(5, len(layers))

        self.assertAllEqual([
            tf.keras.layers.Conv2D, tf.keras.layers.Conv2D,
            tf.keras.layers.Flatten, tf.keras.layers.Dense,
            tf.keras.layers.Dense
        ], [type(layer) for layer in layers])

        layers = utils.mlp_layers(conv_layer_params=[(3, 4, 5), (4, 6, 8)],
                                  fc_layer_params=[10, 20],
                                  activation_fn=tf.keras.activations.tanh,
                                  dropout_layer_params=[0.5, 0.3],
                                  name='testnet')
        self.assertEqual(7, len(layers))

        self.assertAllEqual([
            tf.keras.layers.Conv2D, tf.keras.layers.Conv2D,
            tf.keras.layers.Flatten, tf.keras.layers.Dense,
            permanent_variable_rate_dropout.PermanentVariableRateDropout,
            tf.keras.layers.Dense,
            permanent_variable_rate_dropout.PermanentVariableRateDropout
        ], [type(layer) for layer in layers])

        layers = utils.mlp_layers(conv_layer_params=[(3, 4, 5), (4, 6, 8)],
                                  fc_layer_params=[10, 20],
                                  activation_fn=tf.keras.activations.tanh,
                                  dropout_layer_params=[None, 0.3],
                                  name='testnet')
        self.assertEqual(6, len(layers))

        self.assertAllEqual([
            tf.keras.layers.Conv2D, tf.keras.layers.Conv2D,
            tf.keras.layers.Flatten, tf.keras.layers.Dense,
            tf.keras.layers.Dense,
            permanent_variable_rate_dropout.PermanentVariableRateDropout
        ], [type(layer) for layer in layers])

        layers = utils.mlp_layers(
            conv_layer_params=[(3, 4, 5), (4, 6, 8)],
            fc_layer_params=[10, 20],
            activation_fn=tf.keras.activations.tanh,
            dropout_layer_params=[dict(rate=0.5, permanent=True), None],
            name='testnet')
        self.assertEqual(6, len(layers))

        self.assertAllEqual([
            tf.keras.layers.Conv2D, tf.keras.layers.Conv2D,
            tf.keras.layers.Flatten, tf.keras.layers.Dense,
            permanent_variable_rate_dropout.PermanentVariableRateDropout,
            tf.keras.layers.Dense
        ], [type(layer) for layer in layers])
Exemple #2
0
  def __init__(self,
               observation_spec,
               action_spec,
               fc_layer_params=(200, 100),
               conv_layer_params=None,
               activation_fn=tf.keras.activations.relu,
               categorical_projection_net=_categorical_projection_net,
               normal_projection_net=_normal_projection_net,
               name='ActorDistributionNetwork'):
    """Creates an instance of `ActorDistributionNetwork`.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      categorical_projection_net: Callable that generates a categorical
        projection network to be called with some hidden state and the
        outer_rank of the state.
      normal_projection_net: Callable that generates a normal projection network
        to be called with some hidden state and the outer_rank of the state.
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` contains more than one observation.
    """
    super(ActorDistributionNetwork, self).__init__(
        observation_spec=observation_spec,
        action_spec=action_spec,
        state_spec=(),
        name=name)

    if len(nest.flatten(observation_spec)) > 1:
      raise ValueError('Only a single observation is supported by this network')

    self._mlp_layers = utils.mlp_layers(
        conv_layer_params,
        fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.keras.initializers.glorot_uniform(),
        name='input_mlp')

    self._projection_networks = []
    for single_output_spec in nest.flatten(action_spec):
      if single_output_spec.is_discrete():
        self._projection_networks.append(
            categorical_projection_net(single_output_spec))
      else:
        self._projection_networks.append(
            normal_projection_net(single_output_spec))
  def __init__(self,
               input_tensor_spec,
               output_tensor_spec,
               fc_layer_params=(256, 256),
               conv_layer_params=None,
               activation_fn=tf.keras.activations.relu,
               name='ActorDistributionNetwork'):
    """Creates an instance of `ActorDistributionNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the output.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Raises:
      ValueError: If `input_tensor_spec` or `output_tensor_spec` contains more
        than one spec.
    """
    super(ActorDistributionNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=(),
        output_spec=output_tensor_spec,
        name=name)

    if len(tf.nest.flatten(input_tensor_spec)) > 1:
      raise ValueError('Only a single observation is supported by this network')

    flat_action_spec = tf.nest.flatten(output_tensor_spec)
    if len(flat_action_spec) > 1:
      raise ValueError('Only a single action is supported by this network')
    self._single_action_spec = flat_action_spec[0]

    # TODO(kbanoop): Replace mlp_layers with encoding networks.
    self._mlp_layers = utils.mlp_layers(
        conv_layer_params,
        fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(),
        name='input_mlp')

    self._mlp_layers.append(
        tf.keras.layers.Dense(
            2 * self._single_action_spec.shape.num_elements(),
            activation=None,
            kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(),
            name='normal_projection_layer'))
Exemple #4
0
  def __init__(self,
               input_tensor_spec,
               fc_layer_params=(75, 40),
               conv_layer_params=None,
               activation_fn=tf.keras.activations.relu,
               name='ValueNetwork'):
    """Creates an instance of `ValueNetwork`.

    Network supports calls with shape outer_rank + observation_spec.shape. Note
    outer_rank must be at least 1.

    Args:
      input_tensor_spec: A `tensor_spec.TensorSpec` or a tuple of specs
        representing the input observations.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.

    Raises:
      ValueError: If input_tensor_spec is not an instance of network.InputSpec.
      ValueError: If `input_tensor_spec.observations` contains more than one
      observation.
    """
    super(ValueNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=(),
        name=name)

    if len(tf.nest.flatten(input_tensor_spec)) > 1:
      raise ValueError(
          'Network only supports observation specs with a single observation.')

    self._postprocessing_layers = utils.mlp_layers(
        conv_layer_params,
        fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(),
        name='input_mlp')

    self._postprocessing_layers.append(
        tf.keras.layers.Dense(
            1,
            activation=None,
            kernel_initializer=tf.compat.v1.initializers.random_uniform(
                minval=-0.03, maxval=0.03),
        ))
Exemple #5
0
    def __init__(self,
                 input_tensor_spec,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=(256, ),
                 action_fc_layer_params=None,
                 joint_fc_layer_params=(256, ),
                 activation_fn=tf.nn.relu,
                 name='CriticNetwork',
                 output_dim=None):
        """Creates an instance of `CriticNetwork`.
		Args:
			input_tensor_spec: A tuple of (observation, action) each a nest of
				`tensor_spec.TensorSpec` representing the inputs.
			observation_conv_layer_params: Optional list of convolution layer
				parameters for observations, where each item is a length-three tuple
				indicating (num_units, kernel_size, stride).
			observation_fc_layer_params: Optional list of fully connected parameters
				for observations, where each item is the number of units in the layer.
			action_fc_layer_params: Optional list of fully connected parameters for
				actions, where each item is the number of units in the layer.
			joint_fc_layer_params: Optional list of fully connected parameters after
				merging observations and actions, where each item is the number of units
				in the layer.
			activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
			name: A string representing name of the network.
			output_dim: An integer specifying the number of outputs. If None, output
				will be flattened.
		"""
        self._output_dim = output_dim
        (_, action_spec) = input_tensor_spec
        modified_obs_spec = None
        modified_tensor_spec = (modified_obs_spec, action_spec)

        super(critic_network.CriticNetwork,
              self).__init__(input_tensor_spec=modified_tensor_spec,
                             state_spec=(),
                             name=name)
        self._input_tensor_spec = input_tensor_spec

        flat_action_spec = tf.nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        self._observation_layers = utils.mlp_layers(
            observation_conv_layer_params,
            observation_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='observation_encoding')

        self._action_layers = utils.mlp_layers(
            None,
            action_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(
                self._output_dim if self._output_dim is not None else 1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value'))
Exemple #6
0
    def __init__(
            self,
            root_dir,
            conv_1d_layer_params=[(32, 8, 4), (64, 4, 2), (64, 3, 1)],
            conv_2d_layer_params=[(32, (8, 8), 4), (64, (4, 4), 2),
                                  (64, (3, 3), 2)],
            encoder_fc_layers=[256],
            actor_fc_layers=[256],
            critic_obs_fc_layers=[256],
            critic_action_fc_layers=[256],
            critic_joint_fc_layers=[256],
            # Params for target update
            target_update_tau=0.005,
            target_update_period=1,
            # Params for train
            actor_learning_rate=3e-4,
            critic_learning_rate=3e-4,
            alpha_learning_rate=3e-4,
            td_errors_loss_fn=tf.compat.v1.losses.mean_squared_error,
            gamma=0.99,
            reward_scale_factor=1.0,
            gradient_clipping=None,
            # Params for eval
            eval_deterministic=False,
            # Params for summaries and logging
            debug_summaries=False,
            summarize_grads_and_vars=False):
        '''A simple train and eval for SAC.'''
        tf.compat.v1.enable_resource_variables()

        root_dir = os.path.expanduser(root_dir)
        policy_dir = os.path.join(root_dir, 'train', 'policy')

        time_step_spec = TimeStep(
            TensorSpec(shape=(), dtype=tf.int32, name='step_type'),
            TensorSpec(shape=(), dtype=tf.float32, name='reward'),
            BoundedTensorSpec(shape=(),
                              dtype=tf.float32,
                              name='discount',
                              minimum=np.array(0., dtype=np.float32),
                              maximum=np.array(1., dtype=np.float32)),
            collections.OrderedDict({
                'task_obs':
                BoundedTensorSpec(shape=(TASK_OBS_DIM, ),
                                  dtype=tf.float32,
                                  name=None,
                                  minimum=np.array(-3.4028235e+38,
                                                   dtype=np.float32),
                                  maximum=np.array(3.4028235e+38,
                                                   dtype=np.float32)),
                'depth':
                BoundedTensorSpec(shape=(IMG_HEIGHT, IMG_WIDTH, 1),
                                  dtype=tf.float32,
                                  name=None,
                                  minimum=np.array(-1.0, dtype=np.float32),
                                  maximum=np.array(1.0, dtype=np.float32)),
                'rgb':
                BoundedTensorSpec(shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                                  dtype=tf.float32,
                                  name=None,
                                  minimum=np.array(-1.0, dtype=np.float32),
                                  maximum=np.array(1.0, dtype=np.float32)),
            }))
        observation_spec = time_step_spec.observation
        action_spec = BoundedTensorSpec(shape=(2, ),
                                        dtype=tf.float32,
                                        name=None,
                                        minimum=np.array(-1.0,
                                                         dtype=np.float32),
                                        maximum=np.array(1.0,
                                                         dtype=np.float32))

        glorot_uniform_initializer = tf.compat.v1.keras.initializers.glorot_uniform(
        )
        preprocessing_layers = {}
        if 'rgb' in observation_spec:
            preprocessing_layers['rgb'] = tf.keras.Sequential(
                mlp_layers(
                    conv_1d_layer_params=None,
                    conv_2d_layer_params=conv_2d_layer_params,
                    fc_layer_params=encoder_fc_layers,
                    kernel_initializer=glorot_uniform_initializer,
                ))

        if 'depth' in observation_spec:
            preprocessing_layers['depth'] = tf.keras.Sequential(
                mlp_layers(
                    conv_1d_layer_params=None,
                    conv_2d_layer_params=conv_2d_layer_params,
                    fc_layer_params=encoder_fc_layers,
                    kernel_initializer=glorot_uniform_initializer,
                ))

        if 'task_obs' in observation_spec:
            preprocessing_layers['task_obs'] = tf.keras.Sequential(
                mlp_layers(
                    conv_1d_layer_params=None,
                    conv_2d_layer_params=None,
                    fc_layer_params=encoder_fc_layers,
                    kernel_initializer=glorot_uniform_initializer,
                ))

        if len(preprocessing_layers) <= 1:
            preprocessing_combiner = None
        else:
            preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1)

        actor_net = actor_distribution_network.ActorDistributionNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            fc_layer_params=actor_fc_layers,
            continuous_projection_net=normal_projection_net,
            kernel_initializer=glorot_uniform_initializer,
        )

        critic_net = critic_network.CriticNetwork(
            (observation_spec, action_spec),
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            observation_fc_layer_params=critic_obs_fc_layers,
            action_fc_layer_params=critic_action_fc_layers,
            joint_fc_layer_params=critic_joint_fc_layers,
            kernel_initializer=glorot_uniform_initializer,
        )

        global_step = tf.compat.v1.train.get_or_create_global_step()
        tf_agent = sac_agent.SacAgent(
            time_step_spec,
            action_spec,
            actor_network=actor_net,
            critic_network=critic_net,
            actor_optimizer=tf.compat.v1.train.AdamOptimizer(
                learning_rate=actor_learning_rate),
            critic_optimizer=tf.compat.v1.train.AdamOptimizer(
                learning_rate=critic_learning_rate),
            alpha_optimizer=tf.compat.v1.train.AdamOptimizer(
                learning_rate=alpha_learning_rate),
            target_update_tau=target_update_tau,
            target_update_period=target_update_period,
            td_errors_loss_fn=td_errors_loss_fn,
            gamma=gamma,
            reward_scale_factor=reward_scale_factor,
            gradient_clipping=gradient_clipping,
            debug_summaries=debug_summaries,
            summarize_grads_and_vars=summarize_grads_and_vars,
            train_step_counter=global_step)

        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        self.sess = tf.compat.v1.Session(config=config)

        if eval_deterministic:
            self.eval_py_policy = py_tf_policy.PyTFPolicy(
                greedy_policy.GreedyPolicy(tf_agent.policy))
        else:
            self.eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.policy)

        policy_checkpointer = common.Checkpointer(ckpt_dir=policy_dir,
                                                  policy=tf_agent.policy,
                                                  global_step=global_step)

        with self.sess.as_default():
            # Initialize graph.
            policy_checkpointer.initialize_or_restore(self.sess)

        # activate the session
        obs = {
            'depth': np.ones((IMG_HEIGHT, IMG_WIDTH, 1)),
            'rgb': np.ones((IMG_HEIGHT, IMG_WIDTH, 3)),
            'task_obs': np.ones((TASK_OBS_DIM, ))
        }
        action = self.act(obs)
        print('activate TF session')
        print('action', action)
Exemple #7
0
    def __init__(self,
                 input_tensor_spec,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=None,
                 observation_dropout_layer_params=None,
                 action_fc_layer_params=None,
                 action_dropout_layer_params=None,
                 joint_fc_layer_params=None,
                 joint_dropout_layer_params=None,
                 activation_fn=tf.nn.relu,
                 name='CriticNetwork'):
        """Creates an instance of `CriticNetwork`.

    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      observation_dropout_layer_params: Optional list of dropout layer
        parameters, each item is the fraction of input units to drop or a
        dictionary of parameters according to the keras.Dropout documentation.
        The additional parameter `permanent', if set to True, allows to apply
        dropout at inference for approximated Bayesian inference. The dropout
        layers are interleaved with the fully connected layers; there is a
        dropout layer after each fully connected layer, except if the entry in
        the list is None. This list must have the same length of
        observation_fc_layer_params, or be None.
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      action_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of action_fc_layer_params, or
        be None.
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      joint_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of joint_fc_layer_params, or
        be None.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
        super(CriticNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        observation_spec, action_spec = input_tensor_spec

        if len(tf.nest.flatten(observation_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network')

        flat_action_spec = tf.nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        # TODO(kbanoop): Replace mlp_layers with encoding networks.
        self._observation_layers = utils.mlp_layers(
            observation_conv_layer_params,
            observation_fc_layer_params,
            observation_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='observation_encoding')

        self._action_layers = utils.mlp_layers(
            None,
            action_fc_layer_params,
            action_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            joint_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value'))
Exemple #8
0
    def __init__(self,
                 input_tensor_spec,
                 observation_preprocessing_layers=None,
                 observation_preprocessing_combiner=None,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=None,
                 observation_dropout_layer_params=None,
                 action_fc_layer_params=None,
                 action_dropout_layer_params=None,
                 joint_fc_layer_params=None,
                 joint_dropout_layer_params=None,
                 activation_fn=tf.nn.relu,
                 kernel_initializer=None,
                 name='CriticNetwork'):
        """Creates an instance of `CriticNetwork`.

        This CriticNetwork supports handling complex observations with preprocessing_layer
        and preprocessing_combiner.

        Args:
            input_tensor_spec: A tuple of (observation, action) each a nest of
                `tensor_spec.TensorSpec` representing the inputs.
            observation_preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
                representing preprocessing for the different observations.
                All of these layers must not be already built. For more details see
                the documentation of `networks.EncodingNetwork`.
            observation_preprocessing_combiner: (Optional.) A keras layer that takes a flat list
                of tensors and combines them. Good options include
                `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
                This layer must not be already built. For more details see
                the documentation of `networks.EncodingNetwork`.
            observation_conv_layer_params: Optional list of convolution layer
                parameters for observations, where each item is a length-three tuple
                indicating (num_units, kernel_size, stride).
            observation_fc_layer_params: Optional list of fully connected parameters
                for observations, where each item is the number of units in the layer.
            observation_dropout_layer_params: Optional list of dropout layer
                parameters, each item is the fraction of input units to drop or a
                dictionary of parameters according to the keras.Dropout documentation.
                The additional parameter `permanent', if set to True, allows to apply
                dropout at inference for approximated Bayesian inference. The dropout
                layers are interleaved with the fully connected layers; there is a
                dropout layer after each fully connected layer, except if the entry in
                the list is None. This list must have the same length of
                observation_fc_layer_params, or be None.
            action_fc_layer_params: Optional list of fully connected parameters for
                actions, where each item is the number of units in the layer.
            action_dropout_layer_params: Optional list of dropout layer parameters,
                each item is the fraction of input units to drop or a dictionary of
                parameters according to the keras.Dropout documentation. The additional
                parameter `permanent', if set to True, allows to apply dropout at
                inference for approximated Bayesian inference. The dropout layers are
                interleaved with the fully connected layers; there is a dropout layer
                after each fully connected layer, except if the entry in the list is
                None. This list must have the same length of action_fc_layer_params, or
                be None.
            joint_fc_layer_params: Optional list of fully connected parameters after
                merging observations and actions, where each item is the number of units
                in the layer.
            joint_dropout_layer_params: Optional list of dropout layer parameters,
                each item is the fraction of input units to drop or a dictionary of
                parameters according to the keras.Dropout documentation. The additional
                parameter `permanent', if set to True, allows to apply dropout at
                inference for approximated Bayesian inference. The dropout layers are
                interleaved with the fully connected layers; there is a dropout layer
                after each fully connected layer, except if the entry in the list is
                None. This list must have the same length of joint_fc_layer_params, or
                be None.
            activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
            kernel_initializer: Initializer to use for the kernels of the conv and
                dense layers. If none is provided a default variance_scaling_initializer
            name: A string representing name of the network.
        Raises:
            ValueError: If `action_spec` contains more than one observation.
        """
        super(CriticNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        observation_spec, action_spec = input_tensor_spec

        flat_action_spec = tf.nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')

        if not kernel_initializer:
            kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform(
            )

        self._encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=observation_preprocessing_layers,
            preprocessing_combiner=observation_preprocessing_combiner,
            conv_layer_params=observation_conv_layer_params,
            fc_layer_params=observation_fc_layer_params,
            dropout_layer_params=observation_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer)

        self._single_action_spec = flat_action_spec[0]

        self._action_layers = utils.mlp_layers(
            fc_layer_params=action_fc_layer_params,
            dropout_layer_params=action_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            fc_layer_params=joint_fc_layer_params,
            dropout_layer_params=joint_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value'))
  def __init__(self,
               input_tensor_spec,
               preprocessing_layers=None,
               preprocessing_combiner=None,
               batch_squash=True,
               observation_conv_layer_params=None,
               observation_fc_layer_params=None,
               observation_dropout_layer_params=None,
               action_fc_layer_params=None,
               action_dropout_layer_params=None,
               joint_fc_layer_params=None,
               joint_dropout_layer_params=None,
               activation_fn=tf.nn.relu,
               output_activation_fn=None,
               kernel_initializer=None,
               last_kernel_initializer=None,
               name='CriticNetwork'):
    """Creates an instance of `CriticNetwork`.
    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      batch_squash: If True the outer_ranks of the observation are squashed into
        the batch dimension. This allow encoding networks to be used with
        observations with shape [BxTx...].
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      observation_dropout_layer_params: Optional list of dropout layer
        parameters, each item is the fraction of input units to drop or a
        dictionary of parameters according to the keras.Dropout documentation.
        The additional parameter `permanent', if set to True, allows to apply
        dropout at inference for approximated Bayesian inference. The dropout
        layers are interleaved with the fully connected layers; there is a
        dropout layer after each fully connected layer, except if the entry in
        the list is None. This list must have the same length of
        observation_fc_layer_params, or be None.
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      action_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of action_fc_layer_params, or
        be None.
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      joint_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of joint_fc_layer_params, or
        be None.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      output_activation_fn: Activation function for the last layer. This can be
        used to restrict the range of the output. For example, one can pass
        tf.keras.activations.sigmoid here to restrict the output to be bounded
        between 0 and 1.
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression
         layer. If None, a RandomUniform initializer will be used.
      name: A string representing name of the network.
    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
    super(PaintingCriticNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=(),
        name=name)

    observation_spec, action_spec = input_tensor_spec

    # if len(tf.nest.flatten(observation_spec)) > 1:
    #   raise ValueError('Only a single observation is supported by this network')

    flat_action_spec = tf.nest.flatten(action_spec)
    if len(flat_action_spec) > 1:
      raise ValueError('Only a single action is supported by this network')
    self._single_action_spec = flat_action_spec[0]

    if kernel_initializer is None:
      kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
          scale=1. / 3., mode='fan_in', distribution='uniform')
    if last_kernel_initializer is None:
      last_kernel_initializer = tf.keras.initializers.RandomUniform(
          minval=-0.003, maxval=0.003)

    encoder = encoding_network.EncodingNetwork(
        observation_spec,
        preprocessing_layers=preprocessing_layers,
        preprocessing_combiner=preprocessing_combiner,
        conv_layer_params=observation_conv_layer_params,
        fc_layer_params=observation_fc_layer_params,
        dropout_layer_params=observation_dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        batch_squash=batch_squash,
        name='observation_encoding') 
    self._encoder = encoder

    self._action_layers = utils.mlp_layers(
        None,
        action_fc_layer_params,
        action_dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        name='action_encoding')

    self._joint_layers = utils.mlp_layers(
        None,
        joint_fc_layer_params,
        joint_dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=kernel_initializer,
        name='joint_mlp')

    self._joint_layers.append(
        tf.keras.layers.Dense(
            1,
            activation=output_activation_fn,
            kernel_initializer=last_kernel_initializer,
            name='value'))
Exemple #10
0
    def __init__(self,
                 input_tensor_spec,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=None,
                 action_fc_layer_params=None,
                 joint_fc_layer_params=(256, 256),
                 activation_fn=tf.nn.relu,
                 name='CriticNetwork'):
        """Creates an instance of `CriticNetwork`.

    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        spec.
    """
        super(CriticNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        observation_spec, action_spec = input_tensor_spec

        if len(tf.nest.flatten(observation_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network')

        flat_action_spec = tf.nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        # TODO(kbanoop): Replace mlp_layers with encoding networks.
        self._observation_layers = utils.mlp_layers(
            observation_conv_layer_params,
            observation_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(
            ),
            name='observation_encoding')

        self._action_layers = utils.mlp_layers(
            None,
            action_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(
            ),
            name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(
            ),
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(1,
                                  activation=None,
                                  kernel_initializer=tf.compat.v1.keras.
                                  initializers.glorot_uniform(),
                                  name='value'))
  def __init__(self,
               input_tensor_spec,
               output_tensor_spec,
               fc_layer_params=(200, 100),
               dropout_layer_params=None,
               conv_layer_params=None,
               activation_fn=tf.keras.activations.relu,
               discrete_projection_net=_categorical_projection_net,
               continuous_projection_net=_normal_projection_net,
               name='ActorDistributionNetwork'):
    """Creates an instance of `ActorDistributionNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the output.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      discrete_projection_net: Callable that generates a discrete projection
        network to be called with some hidden state and the outer_rank of the
        state.
      continuous_projection_net: Callable that generates a continuous projection
        network to be called with some hidden state and the outer_rank of the
        state.
      name: A string representing name of the network.

    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation.
    """

    if len(tf.nest.flatten(input_tensor_spec)) > 1:
      raise ValueError('Only a single observation is supported by this network')

    mlp_layers = utils.mlp_layers(
        conv_layer_params,
        fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(),
        dropout_layer_params=dropout_layer_params,
        name='input_mlp')

    def map_proj(spec):
      if tensor_spec.is_discrete(spec):
        return discrete_projection_net(spec)
      else:
        return continuous_projection_net(spec)

    projection_networks = tf.nest.map_structure(map_proj, output_tensor_spec)
    output_spec = tf.nest.map_structure(lambda proj_net: proj_net.output_spec,
                                        projection_networks)

    super(ActorDistributionNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=(),
        output_spec=output_spec,
        name=name)

    self._mlp_layers = mlp_layers
    self._projection_networks = projection_networks
    self._output_tensor_spec = output_tensor_spec
Exemple #12
0
  def __init__(self,
               input_tensor_spec,
               conv_layer_params=None,
               input_fc_layer_params=(75, 40),
               input_dropout_layer_params=None,
               lstm_size=(40,),
               output_fc_layer_params=(75, 40),
               activation_fn=tf.keras.activations.relu,
               name='ValueRnnNetwork'):
    """Creates an instance of `ValueRnnNetwork`.

    Network supports calls with shape outer_rank + input_tensor_shape.shape.
    Note outer_rank must be at least 1.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied before
        the LSTM cell.
      input_dropout_layer_params: Optional list of dropout layer parameters,
        where each item is the fraction of input units to drop. The dropout
        layers are interleaved with the fully connected layers; there is a
        dropout layer after each fully connected layer, except if the entry in
        the list is None. This list must have the same length of
        input_fc_layer_params, or be None.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` contains more than one observation.
    """
    if len(tf.nest.flatten(input_tensor_spec)) > 1:
      raise ValueError(
          'Network only supports observation_specs with a single observation.')

    input_layers = utils.mlp_layers(
        conv_layer_params,
        input_fc_layer_params,
        input_dropout_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(),
        name='input_mlp')

    # Create RNN cell
    if len(lstm_size) == 1:
      cell = tf.keras.layers.LSTMCell(lstm_size[0])
    else:
      cell = tf.keras.layers.StackedRNNCells(
          [tf.keras.layers.LSTMCell(size) for size in lstm_size])

    state_spec = tf.nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), cell.state_size)

    output_layers = []
    if output_fc_layer_params:
      output_layers = [
          tf.keras.layers.Dense(
              num_units,
              activation=activation_fn,
              kernel_initializer=tf.compat.v1.variance_scaling_initializer(
                  scale=2.0, mode='fan_in', distribution='truncated_normal'),
              name='output/dense') for num_units in output_fc_layer_params
      ]

    value_projection_layer = tf.keras.layers.Dense(
        1,
        activation=None,
        kernel_initializer=tf.compat.v1.initializers.random_uniform(
            minval=-0.03, maxval=0.03),
    )

    state_spec = tf.nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), list(cell.state_size))

    super(ValueRnnNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=state_spec,
        name=name)

    self._conv_layer_params = conv_layer_params
    self._input_layers = input_layers
    self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
    self._output_layers = output_layers
    self._value_projection_layer = value_projection_layer
  def __init__(self,
               input_tensor_spec,
               output_tensor_spec,
               input_fc_layer_params=(200, 100),
               output_fc_layer_params=(200, 100),
               conv_layer_params=None,
               lstm_size=(40,),
               activation_fn=tf.keras.activations.relu,
               categorical_projection_net=_categorical_projection_net,
               normal_projection_net=_normal_projection_net,
               name='ActorDistributionRnnNetwork'):
    """Creates an instance of `ActorDistributionRnnNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the output.
      input_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied before
        the LSTM cell.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      categorical_projection_net: Callable that generates a categorical
        projection network to be called with some hidden state and the
        outer_rank of the state.
      normal_projection_net: Callable that generates a normal projection network
        to be called with some hidden state and the outer_rank of the state.
      name: A string representing name of the network.

    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation.
    """
    if len(tf.nest.flatten(input_tensor_spec)) > 1:
      raise ValueError('Only a single observation is supported by this network')

    input_layers = utils.mlp_layers(
        conv_layer_params,
        input_fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(),
        name='input_mlp')

    # Create RNN cell
    if len(lstm_size) == 1:
      cell = tf.keras.layers.LSTMCell(lstm_size[0])
    else:
      cell = tf.keras.layers.StackedRNNCells(
          [tf.keras.layers.LSTMCell(size) for size in lstm_size])

    state_spec = tf.nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), cell.state_size)

    output_layers = utils.mlp_layers(
        fc_layer_params=output_fc_layer_params, name='output')

    projection_networks = []
    for single_output_spec in tf.nest.flatten(output_tensor_spec):
      if tensor_spec.is_discrete(single_output_spec):
        projection_networks.append(
            categorical_projection_net(single_output_spec))
      else:
        projection_networks.append(normal_projection_net(single_output_spec))

    projection_distribution_specs = [
        proj_net.output_spec for proj_net in projection_networks
    ]
    output_spec = tf.nest.pack_sequence_as(output_tensor_spec,
                                           projection_distribution_specs)

    super(ActorDistributionRnnNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=state_spec,
        output_spec=output_spec,
        name=name)

    self._conv_layer_params = conv_layer_params
    self._input_layers = input_layers
    self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
    self._output_layers = output_layers
    self._projection_networks = projection_networks
    self._output_tensor_spec = output_tensor_spec
Exemple #14
0
def train_eval(
        root_dir,
        gpu='1',
        env_load_fn=None,
        model_ids=None,
        eval_env_mode='headless',
        conv_layer_params=None,
        encoder_fc_layers=[256],
        actor_fc_layers=[256, 256],
        value_fc_layers=[256, 256],
        use_rnns=False,
        # Params for collect
        num_environment_steps=10000000,
        collect_episodes_per_iteration=30,
        num_parallel_environments=30,
        replay_buffer_capacity=1001,  # Per-environment
        # Params for train
        num_epochs=25,
        learning_rate=1e-4,
        # Params for eval
        num_eval_episodes=30,
        eval_interval=500,
        eval_only=False,
        eval_deterministic=False,
        num_parallel_environments_eval=1,
        model_ids_eval=None,
        # Params for summaries and logging
        train_checkpoint_interval=500,
        policy_checkpoint_interval=500,
        rb_checkpoint_interval=500,
        log_interval=10,
        summary_interval=50,
        summaries_flush_secs=1,
        debug_summaries=False,
        summarize_grads_and_vars=False,
        eval_metrics_callback=None):
    """A simple train and eval for PPO."""
    if root_dir is None:
        raise AttributeError('train_eval requires a root_dir.')

    root_dir = os.path.expanduser(root_dir)
    train_dir = os.path.join(root_dir, 'train')
    eval_dir = os.path.join(root_dir, 'eval')

    train_summary_writer = tf.compat.v2.summary.create_file_writer(
        train_dir, flush_millis=summaries_flush_secs * 1000)
    train_summary_writer.set_as_default()

    eval_summary_writer = tf.compat.v2.summary.create_file_writer(
        eval_dir, flush_millis=summaries_flush_secs * 1000)
    eval_metrics = [
        batched_py_metric.BatchedPyMetric(
            py_metrics.AverageReturnMetric,
            metric_args={'buffer_size': num_eval_episodes},
            batch_size=num_parallel_environments_eval),
        batched_py_metric.BatchedPyMetric(
            py_metrics.AverageEpisodeLengthMetric,
            metric_args={'buffer_size': num_eval_episodes},
            batch_size=num_parallel_environments_eval),
    ]
    eval_summary_writer_flush_op = eval_summary_writer.flush()
    global_step = tf.compat.v1.train.get_or_create_global_step()
    with tf.compat.v2.summary.record_if(
            lambda: tf.math.equal(global_step % summary_interval, 0)):
        if model_ids is None:
            model_ids = [None] * num_parallel_environments
        else:
            assert len(model_ids) == num_parallel_environments,\
                'model ids provided, but length not equal to num_parallel_environments'

        if model_ids_eval is None:
            model_ids_eval = [None] * num_parallel_environments_eval
        else:
            assert len(model_ids_eval) == num_parallel_environments_eval,\
                'model ids eval provided, but length not equal to num_parallel_environments_eval'

        tf_py_env = [lambda model_id=model_ids[i]: env_load_fn(model_id, 'headless', gpu)
                     for i in range(num_parallel_environments)]
        tf_env = tf_py_environment.TFPyEnvironment(parallel_py_environment.ParallelPyEnvironment(tf_py_env))

        if eval_env_mode == 'gui':
            assert num_parallel_environments_eval == 1, 'only one GUI env is allowed'
        eval_py_env = [lambda model_id=model_ids_eval[i]: env_load_fn(model_id, eval_env_mode, gpu)
                       for i in range(num_parallel_environments_eval)]
        eval_py_env = parallel_py_environment.ParallelPyEnvironment(eval_py_env)

        optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)

        time_step_spec = tf_env.time_step_spec()
        observation_spec = tf_env.observation_spec()
        action_spec = tf_env.action_spec()
        print('observation_spec', observation_spec)
        print('action_spec', action_spec)

        glorot_uniform_initializer = tf.compat.v1.keras.initializers.glorot_uniform()
        preprocessing_layers = {
            'depth_seg': tf.keras.Sequential(mlp_layers(
                conv_layer_params=conv_layer_params,
                fc_layer_params=encoder_fc_layers,
                kernel_initializer=glorot_uniform_initializer,
            )),
            'sensor': tf.keras.Sequential(mlp_layers(
                conv_layer_params=None,
                fc_layer_params=encoder_fc_layers,
                kernel_initializer=glorot_uniform_initializer,
            )),
        }
        preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1)

        if use_rnns:
            actor_net = actor_distribution_rnn_network.ActorDistributionRnnNetwork(
                observation_spec,
                action_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                input_fc_layer_params=actor_fc_layers,
                output_fc_layer_params=None)
            value_net = value_rnn_network.ValueRnnNetwork(
                observation_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                input_fc_layer_params=value_fc_layers,
                output_fc_layer_params=None)
        else:
            actor_net = actor_distribution_network.ActorDistributionNetwork(
                observation_spec,
                action_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                fc_layer_params=actor_fc_layers,
                kernel_initializer=glorot_uniform_initializer
            )
            value_net = value_network.ValueNetwork(
                observation_spec,
                preprocessing_layers=preprocessing_layers,
                preprocessing_combiner=preprocessing_combiner,
                fc_layer_params=value_fc_layers,
                kernel_initializer=glorot_uniform_initializer
            )

        tf_agent = ppo_agent.PPOAgent(
            time_step_spec,
            action_spec,
            optimizer,
            actor_net=actor_net,
            value_net=value_net,
            num_epochs=num_epochs,
            debug_summaries=debug_summaries,
            summarize_grads_and_vars=summarize_grads_and_vars,
            train_step_counter=global_step)

        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.compat.v1.Session(config=config)

        replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
            tf_agent.collect_data_spec,
            batch_size=num_parallel_environments,
            max_length=replay_buffer_capacity)

        if eval_deterministic:
            eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.policy)
        else:
            eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.collect_policy)

        environment_steps_metric = tf_metrics.EnvironmentSteps()
        environment_steps_count = environment_steps_metric.result()
        step_metrics = [
            tf_metrics.NumberOfEpisodes(),
            environment_steps_metric,
        ]
        train_metrics = step_metrics + [
            tf_metrics.AverageReturnMetric(
                buffer_size=100,
                batch_size=num_parallel_environments),
            tf_metrics.AverageEpisodeLengthMetric(
                buffer_size=100,
                batch_size=num_parallel_environments),
        ]

        # Add to replay buffer and other agent specific observers.
        replay_buffer_observer = [replay_buffer.add_batch]

        collect_policy = tf_agent.collect_policy

        collect_op = dynamic_episode_driver.DynamicEpisodeDriver(
            tf_env,
            collect_policy,
            observers=replay_buffer_observer + train_metrics,
            num_episodes=collect_episodes_per_iteration * num_parallel_environments).run()

        trajectories = replay_buffer.gather_all()

        train_op, _ = tf_agent.train(experience=trajectories)

        with tf.control_dependencies([train_op]):
            clear_replay_op = replay_buffer.clear()

        with tf.control_dependencies([clear_replay_op]):
            train_op = tf.identity(train_op)

        train_checkpointer = common.Checkpointer(
            ckpt_dir=train_dir,
            agent=tf_agent,
            global_step=global_step,
            metrics=metric_utils.MetricsGroup(train_metrics, 'train_metrics'))
        policy_checkpointer = common.Checkpointer(
            ckpt_dir=os.path.join(train_dir, 'policy'),
            policy=tf_agent.policy,
            global_step=global_step)
        rb_checkpointer = common.Checkpointer(
            ckpt_dir=os.path.join(train_dir, 'replay_buffer'),
            max_to_keep=1,
            replay_buffer=replay_buffer)

        summary_ops = []
        for train_metric in train_metrics:
            summary_ops.append(train_metric.tf_summaries(
                train_step=global_step, step_metrics=step_metrics))

        with eval_summary_writer.as_default(), tf.compat.v2.summary.record_if(True):
            for eval_metric in eval_metrics:
                eval_metric.tf_summaries(
                    train_step=global_step, step_metrics=step_metrics)

        init_agent_op = tf_agent.initialize()

        with sess.as_default():
            # Initialize graph.
            train_checkpointer.initialize_or_restore(sess)
            rb_checkpointer.initialize_or_restore(sess)

            if eval_only:
                metric_utils.compute_summaries(
                    eval_metrics,
                    eval_py_env,
                    eval_py_policy,
                    num_episodes=num_eval_episodes,
                    global_step=0,
                    callback=eval_metrics_callback,
                    tf_summaries=False,
                    log=True,
                )
                episodes = eval_py_env.get_stored_episodes()
                episodes = [episode for sublist in episodes for episode in sublist][:num_eval_episodes]
                metrics = episode_utils.get_metrics(episodes)
                for key in sorted(metrics.keys()):
                    print(key, ':', metrics[key])

                save_path = os.path.join(eval_dir, 'episodes_eval.pkl')
                episode_utils.save(episodes, save_path)
                print('EVAL DONE')
                return

            common.initialize_uninitialized_variables(sess)
            sess.run(init_agent_op)
            sess.run(train_summary_writer.init())
            sess.run(eval_summary_writer.init())

            collect_time = 0
            train_time = 0
            timed_at_step = sess.run(global_step)
            steps_per_second_ph = tf.compat.v1.placeholder(
                tf.float32, shape=(), name='steps_per_sec_ph')
            steps_per_second_summary = tf.compat.v2.summary.scalar(
                name='global_steps_per_sec', data=steps_per_second_ph,
                step=global_step)

            global_step_val = sess.run(global_step)
            while sess.run(environment_steps_count) < num_environment_steps:
                global_step_val = sess.run(global_step)
                if global_step_val % eval_interval == 0:
                    metric_utils.compute_summaries(
                        eval_metrics,
                        eval_py_env,
                        eval_py_policy,
                        num_episodes=num_eval_episodes,
                        global_step=global_step_val,
                        callback=eval_metrics_callback,
                        log=True,
                    )
                    with eval_summary_writer.as_default(), tf.compat.v2.summary.record_if(True):
                        with tf.name_scope('Metrics/'):
                            episodes = eval_py_env.get_stored_episodes()
                            episodes = [episode for sublist in episodes for episode in sublist][:num_eval_episodes]
                            metrics = episode_utils.get_metrics(episodes)
                            for key in sorted(metrics.keys()):
                                print(key, ':', metrics[key])
                                metric_op = tf.compat.v2.summary.scalar(name=key,
                                                                        data=metrics[key],
                                                                        step=global_step_val)
                                sess.run(metric_op)
                    sess.run(eval_summary_writer_flush_op)

                start_time = time.time()
                sess.run(collect_op)
                collect_time += time.time() - start_time
                start_time = time.time()
                total_loss, _ = sess.run([train_op, summary_ops])
                train_time += time.time() - start_time

                global_step_val = sess.run(global_step)
                if global_step_val % log_interval == 0:
                    logging.info('step = %d, loss = %f', global_step_val, total_loss)
                    steps_per_sec = (
                            (global_step_val - timed_at_step) / (collect_time + train_time))
                    logging.info('%.3f steps/sec', steps_per_sec)
                    sess.run(
                        steps_per_second_summary,
                        feed_dict={steps_per_second_ph: steps_per_sec})
                    logging.info('%s', 'collect_time = {}, train_time = {}'.format(
                        collect_time, train_time))
                    timed_at_step = global_step_val
                    collect_time = 0
                    train_time = 0

                if global_step_val % train_checkpoint_interval == 0:
                    train_checkpointer.save(global_step=global_step_val)

                if global_step_val % policy_checkpoint_interval == 0:
                    policy_checkpointer.save(global_step=global_step_val)

                if global_step_val % rb_checkpoint_interval == 0:
                    rb_checkpointer.save(global_step=global_step_val)

            # One final eval before exiting.
            metric_utils.compute_summaries(
                eval_metrics,
                eval_py_env,
                eval_py_policy,
                num_episodes=num_eval_episodes,
                global_step=global_step_val,
                callback=eval_metrics_callback,
                log=True,
            )
            sess.run(eval_summary_writer_flush_op)

        sess.close()
    def __init__(self,
                 input_tensor_spec,
                 preprocessing_layers=None,
                 preprocessing_combiner=None,
                 action_fc_layer_params=(200, ),
                 joint_fc_layer_params=(100, ),
                 lstm_size=(40, ),
                 output_fc_layer_params=(200, 100),
                 activation_fn=tf.keras.activations.relu,
                 name='MultiInputsCriticRnnNetwork'):
        """Creates an instance of `MultiInputsCriticRnnNetwork`.
    Args:
      input_tensor_spec: A tuple of (observation, action) each of type
        `tensor_spec.TensorSpec` representing the inputs.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      action_fc_layer_params: Optional list of parameters for a fully_connected
        layer to apply to the actions, where each item is the number of units
        in the layer.
      joint_fc_layer_params: Optional list of parameters for a fully_connected
        layer to apply after merging observations and actions, where each item
        is the number of units in the layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.
    Returns:
      A tf.float32 Tensor of q-values.
    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        item.
    """
        observation_spec, action_spec = input_tensor_spec

        if len(tf.nest.flatten(action_spec)) > 1:
            raise ValueError(
                'Only a single action is supported by this network.')

        if preprocessing_layers is None:
            flat_preprocessing_layers = None
        else:
            flat_preprocessing_layers = [
                _copy_layer(layer)
                for layer in tf.nest.flatten(preprocessing_layers)
            ]
            # Assert shallow structure is the same. This verifies preprocessing
            # layers can be applied on expected input nests.
            observation_nest = observation_spec
            # Given the flatten on preprocessing_layers above we need to make sure
            # input_tensor_spec is a sequence for the shallow_structure check below
            # to work.
            if not nest.is_sequence(observation_spec):
                observation_nest = [observation_spec]
            nest.assert_shallow_structure(preprocessing_layers,
                                          observation_nest,
                                          check_types=False)

        if (len(tf.nest.flatten(observation_spec)) > 1
                and preprocessing_combiner is None):
            raise ValueError(
                'preprocessing_combiner layer is required when more than 1 '
                'observation_spec is provided.')

        if preprocessing_combiner is not None:
            preprocessing_combiner = _copy_layer(preprocessing_combiner)

        action_layers = utils.mlp_layers(
            None,
            action_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='action_encoding')

        joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='joint_mlp')

        # Create RNN cell
        if len(lstm_size) == 1:
            cell = tf.keras.layers.LSTMCell(lstm_size[0])
        else:
            cell = tf.keras.layers.StackedRNNCells(
                [tf.keras.layers.LSTMCell(size) for size in lstm_size])

        counter = [-1]

        def create_spec(size):
            counter[0] += 1
            return tensor_spec.TensorSpec(size,
                                          dtype=tf.float32,
                                          name='network_state_%d' % counter[0])

        state_spec = tf.nest.map_structure(create_spec, cell.state_size)

        output_layers = utils.mlp_layers(
            fc_layer_params=output_fc_layer_params, name='output')

        output_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value'))

        super(MultiInputsCriticRnnNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=state_spec,
                             name=name)

        self._action_layers = action_layers
        self._joint_layers = joint_layers
        self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
        self._output_layers = output_layers

        self._preprocessing_nest = tf.nest.map_structure(
            lambda l: None, preprocessing_layers)
        self._flat_preprocessing_layers = flat_preprocessing_layers
        self._preprocessing_combiner = preprocessing_combiner
Exemple #16
0
  def __init__(self,
               input_tensor_spec,
               output_tensor_spec,
               preprocessing_layers=None,
               preprocessing_combiner=None,
               conv_layer_params=None,
               input_fc_layer_params=(200, 100),
               lstm_size=(40,),
               output_fc_layer_params=(200, 100),
               activation_fn=tf.keras.activations.relu,
               name='MultiInputsActorRnnNetwork'):
    """Creates an instance of `MultiInputsActorRnnNetwork`.
    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        input observations.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the actions.
      preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
        representing preprocessing for the different observations.
        All of these layers must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      preprocessing_combiner: (Optional.) A keras layer that takes a flat list
        of tensors and combines them. Good options include
        `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
        This layer must not be already built. For more details see
        the documentation of `networks.EncodingNetwork`.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied before
        the LSTM cell.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.
    Returns:
      A nest of action tensors matching the action_spec.
    Raises:
      ValueError: If `input_tensor_spec` contains more than one observation.
    """
    observation_spec = input_tensor_spec
    if preprocessing_layers is None:
      flat_preprocessing_layers = None
    else:
      flat_preprocessing_layers = [
          _copy_layer(layer) for layer in tf.nest.flatten(preprocessing_layers)
      ]
      # Assert shallow structure is the same. This verifies preprocessing
      # layers can be applied on expected input nests.
      observation_nest = observation_spec
      # Given the flatten on preprocessing_layers above we need to make sure
      # input_tensor_spec is a sequence for the shallow_structure check below
      # to work.
      if not nest.is_sequence(observation_spec):
        observation_nest = [observation_spec]
      nest.assert_shallow_structure(
          preprocessing_layers, observation_nest, check_types=False)

    if (len(tf.nest.flatten(observation_spec)) > 1 and
        preprocessing_combiner is None):
      raise ValueError(
          'preprocessing_combiner layer is required when more than 1 '
          'observation_spec is provided.')

    if preprocessing_combiner is not None:
      preprocessing_combiner = _copy_layer(preprocessing_combiner)

    input_layers = utils.mlp_layers(
        conv_layer_params,
        input_fc_layer_params,
        activation_fn=activation_fn,
        kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(),
        name='input_mlp')

    # Create RNN cell
    if len(lstm_size) == 1:
      cell = tf.keras.layers.LSTMCell(lstm_size[0])
    else:
      cell = tf.keras.layers.StackedRNNCells(
          [tf.keras.layers.LSTMCell(size) for size in lstm_size])

    state_spec = tf.nest.map_structure(
        functools.partial(
            tensor_spec.TensorSpec, dtype=tf.float32,
            name='network_state_spec'), list(cell.state_size))

    output_layers = utils.mlp_layers(fc_layer_params=output_fc_layer_params,
                                     name='output')

    flat_action_spec = tf.nest.flatten(output_tensor_spec)
    action_layers = [
        tf.keras.layers.Dense(
            single_action_spec.shape.num_elements(),
            activation=tf.keras.activations.tanh,
            kernel_initializer=tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003),
            name='action') for single_action_spec in flat_action_spec
    ]

    super(MultiInputsActorRnnNetwork, self).__init__(
        input_tensor_spec=input_tensor_spec,
        state_spec=state_spec,
        name=name)

    self._output_tensor_spec = output_tensor_spec
    self._flat_action_spec = flat_action_spec
    self._conv_layer_params = conv_layer_params
    self._input_layers = input_layers
    self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
    self._output_layers = output_layers
    self._action_layers = action_layers

    self._preprocessing_nest = tf.nest.map_structure(lambda l: None,
                                                     preprocessing_layers)
    self._flat_preprocessing_layers = flat_preprocessing_layers
    self._preprocessing_combiner = preprocessing_combiner
Exemple #17
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec,
                 fc_layer_params=(200, 100),
                 activation_fn=tf.nn.relu,
                 output_activation_fn=None,
                 kernel_initializer=None,
                 last_kernel_initializer=None,
                 discrete_projection_net=_categorical_projection_net,
                 continuous_projection_net=_normal_projection_net,
                 name='PolicyNetwork'):
        """Creates an instance of `ValueNetwork`.

    Args:
      input_tensor_spec: A possibly nested container of
        `tensor_spec.TensorSpec` representing the inputs.
      output_tensor_spec: A possibly nested container of
        `tensor_spec.TensorSpec` representing the outputs.
      fc_layer_params: Optional list of fully connected parameters after
        merging all inputs, where each item is the number of units
        in the layer.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      output_activation_fn: Activation function for the last layer. This can be
        used to restrict the range of the output. For example, one can pass
        tf.keras.activations.sigmoid here to restrict the output to be bounded
        between 0 and 1.
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression
         layer. If None, a RandomUniform initializer will be used.
      discrete_projection_net: projection layer for discrete actions.
      continuous_projection_net: projection layer for continuous actions.
      name: A string representing name of the network.
    """
        def map_proj(spec):
            if tensor_spec.is_discrete(spec):
                return discrete_projection_net(spec)
            else:
                return continuous_projection_net(spec)

        projection_networks = tf.nest.map_structure(map_proj,
                                                    output_tensor_spec)
        output_spec = tf.nest.map_structure(
            lambda proj_net: proj_net.output_spec, projection_networks)
        if tensor_spec.is_discrete(output_tensor_spec):
            action_dim = np.unique(output_tensor_spec.maximum -
                                   output_tensor_spec.minimum + 1)
        else:
            action_dim = output_tensor_spec.shape.num_elements()
        super(PolicyNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             output_spec=output_spec,
                             name=name)

        self._flat_specs = tf.nest.flatten(input_tensor_spec)

        if kernel_initializer is None:
            kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform')
        if last_kernel_initializer is None:
            last_kernel_initializer = tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003)

        self._fc_layers = utils.mlp_layers(
            None,
            fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            name='mlp')
        self._fc_layers.append(
            tf.keras.layers.Dense(action_dim,
                                  activation=output_activation_fn,
                                  kernel_initializer=last_kernel_initializer,
                                  name='value'))

        self._projection_networks = projection_networks
        self._output_tensor_spec = output_tensor_spec
Exemple #18
0
    def __init__(self,
                 input_tensor_spec,
                 observation_preprocessing_layers=None,
                 observation_preprocessing_combiner=None,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=(200, ),
                 action_fc_layer_params=(200, ),
                 joint_fc_layer_params=(100),
                 lstm_size=(40, ),
                 output_fc_layer_params=(200, 100),
                 activation_fn=tf.keras.activations.relu,
                 dtype=tf.float32,
                 name='CriticRnnNetwork'):
        """Creates an instance of `CriticRnnNetwork`.

        This CriticRnnNetwork supports handling complex observations with preprocessing_layer
        and preprocessing_combiner.

        Args:
            input_tensor_spec: A tuple of (observation, action) each of type
                `tensor_spec.TensorSpec` representing the inputs.
            observation_preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer`
                representing preprocessing for the different observations.
                All of these layers must not be already built. For more details see
                the documentation of `networks.EncodingNetwork`.
            observation_preprocessing_combiner: (Optional.) A keras layer that takes a flat list
                of tensors and combines them. Good options include
                `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`.
                This layer must not be already built. For more details see
                the documentation of `networks.EncodingNetwork`.
            observation_conv_layer_params: Optional list of convolution layers
                parameters to apply to the observations, where each item is a
                length-three tuple indicating (filters, kernel_size, stride).
            observation_fc_layer_params: Optional list of fully_connected parameters,
                where each item is the number of units in the layer. This is applied
                after the observation convultional layer.
            action_fc_layer_params: Optional list of parameters for a fully_connected
                layer to apply to the actions, where each item is the number of units
                in the layer.
            joint_fc_layer_params: Optional list of parameters for a fully_connected
                layer to apply after merging observations and actions, where each item
                is the number of units in the layer.
            lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
            output_fc_layer_params: Optional list of fully_connected parameters, where
                each item is the number of units in the layer. This is applied after the
                LSTM cell.
            activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
            name: A string representing name of the network.
        Raises:
            ValueError: If `action_spec` contains more than one item.
        """
        observation_spec, action_spec = input_tensor_spec

        if len(tf.nest.flatten(action_spec)) > 1:
            raise ValueError(
                'Only a single action is supported by this network.')

        kernel_initializer = tf.compat.v1.variance_scaling_initializer(
            scale=2.0, mode='fan_in', distribution='truncated_normal')

        obs_encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=observation_preprocessing_layers,
            preprocessing_combiner=observation_preprocessing_combiner,
            conv_layer_params=observation_conv_layer_params,
            fc_layer_params=observation_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            dtype=dtype,
            name='obs_encoding')

        action_layers = sequential_layer.SequentialLayer(
            utils.mlp_layers(fc_layer_params=action_fc_layer_params,
                             activation_fn=activation_fn,
                             kernel_initializer=tf.compat.v1.keras.
                             initializers.VarianceScaling(
                                 scale=1. / 3.,
                                 mode='fan_in',
                                 distribution='uniform'),
                             name='action_encoding'))

        obs_encoding_spec = tf.TensorSpec(
            shape=(observation_fc_layer_params[-1], ), dtype=tf.float32)
        lstm_encoder = lstm_encoding_network.LSTMEncodingNetwork(
            input_tensor_spec=(obs_encoding_spec, action_spec),
            preprocessing_layers=(tf.keras.layers.Flatten(), action_layers),
            preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1),
            input_fc_layer_params=joint_fc_layer_params,
            lstm_size=lstm_size,
            output_fc_layer_params=output_fc_layer_params,
            activation_fn=activation_fn,
            dtype=dtype,
            name='lstm')

        output_layers = [
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value')
        ]

        super(CriticRnnNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=lstm_encoder.state_spec,
                             name=name)

        self._obs_encoder = obs_encoder
        self._lstm_encoder = lstm_encoder
        self._output_layers = output_layers
Exemple #19
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec,
                 fc_layer_params=None,
                 dropout_layer_params=None,
                 conv_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 kernel_initializer=None,
                 last_kernel_initializer=None,
                 name='ActorNetwork'):
        """Creates an instance of `ActorNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        inputs.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the outputs.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression
         layer. If None, a RandomUniform initializer will be used.
      name: A string representing name of the network.

    Raises:
      ValueError: If `input_tensor_spec` or `action_spec` contains more than one
        item, or if the action data type is not `float`.
    """

        super(ActorNetwork, self).__init__(input_tensor_spec=input_tensor_spec,
                                           state_spec=(),
                                           name=name)

        if len(tf.nest.flatten(input_tensor_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network')

        flat_action_spec = tf.nest.flatten(output_tensor_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        if self._single_action_spec.dtype not in [tf.float32, tf.float64]:
            raise ValueError(
                'Only float actions are supported by this network.')

        if kernel_initializer is None:
            kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform')
        if last_kernel_initializer is None:
            last_kernel_initializer = tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003)

        # TODO(kbanoop): Replace mlp_layers with encoding networks.
        self._mlp_layers = utils.mlp_layers(
            conv_layer_params,
            fc_layer_params,
            dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            name='input_mlp')

        self._mlp_layers.append(
            tf.keras.layers.Dense(flat_action_spec[0].shape.num_elements(),
                                  activation=tf.keras.activations.tanh,
                                  kernel_initializer=last_kernel_initializer,
                                  name='action'))

        self._output_tensor_spec = output_tensor_spec
Exemple #20
0
  def __init__(self,
               input_tensor_spec,
               gnn,
               observation_fc_layer_params=None,
               observation_dropout_layer_params=None,
               observation_conv_layer_params=None,
               observation_activation_fn=tf.nn.relu,
               action_fc_layer_params=None,
               action_dropout_layer_params=None,
               action_conv_layer_params=None,
               action_activation_fn=tf.nn.relu,
               joint_fc_layer_params=None,
               joint_dropout_layer_params=None,
               joint_activation_fn=tf.nn.relu,
               output_activation_fn=None,
               name='CriticNetwork'):
    """Creates an instance of `GNNCriticNetwork`.

    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      gnn: The function that initializes a graph neural network that 
        accepts the input observations and computes node embeddings.
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      observation_dropout_layer_params: Optional list of dropout layer
        parameters, each item is the fraction of input units to drop or a
        dictionary of parameters according to the keras.Dropout documentation.
        The additional parameter `permanent', if set to True, allows to apply
        dropout at inference for approximated Bayesian inference. The dropout
        layers are interleaved with the fully connected layers; there is a
        dropout layer after each fully connected layer, except if the entry in
        the list is None. This list must have the same length of
        observation_fc_layer_params, or be None.
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_activation_fn: Activation function applied to the observation 
        layers, e.g. tf.nn.relu, slim.leaky_relu, ...
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      action_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of action_fc_layer_params, or
        be None.
      action_conv_layer_params: Optional list of convolution layer
        parameters for actions, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      action_activation_fn: Activation function applied to the action layers,
        e.g. tf.nn.relu, slim.leaky_relu, ...
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      joint_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of joint_fc_layer_params, or
        be None.
      joint_activation_fn: Activation function applied to the joint layers,
        e.g. tf.nn.relu, slim.leaky_relu, ...
      output_activation_fn: Activation function for the last layer. This can be
        used to restrict the range of the output. For example, one can pass
        tf.keras.activations.sigmoid here to restrict the output to be bounded
        between 0 and 1.
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
    super(GNNCriticNetwork, self).__init__(
      input_tensor_spec=input_tensor_spec,
      state_spec=(),
      name=name)

    observation_spec, action_spec = input_tensor_spec

    if len(tf.nest.flatten(observation_spec)) > 1:
      raise ValueError('Only a single observation is supported by this network')

    if len(tf.nest.flatten(action_spec)) > 1:
      raise ValueError('Only a single action is supported by this network')
    
    if gnn is None:
      raise ValueError('`gnn` must not be `None`.')

    self._gnn = gnn(name=name + "_GNN")

    self._observation_layers = utils.mlp_layers(
      observation_conv_layer_params,
      observation_fc_layer_params,
      observation_dropout_layer_params,
      activation_fn=observation_activation_fn,
      kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
        scale=1./3., mode='fan_in', distribution='uniform'),
      name='observation_encoding')

    self._action_layers = utils.mlp_layers(
      action_conv_layer_params,
      action_fc_layer_params,
      action_dropout_layer_params,
      activation_fn=action_activation_fn,
      kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
        scale=1./3., mode='fan_in', distribution='uniform'),
      name='action_encoding')

    self._joint_layers = utils.mlp_layers(
        None,
        joint_fc_layer_params,
        joint_dropout_layer_params,
        activation_fn=joint_activation_fn,
        kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
            scale=1./3., mode='fan_in', distribution='uniform'),
        name='joint_mlp')

    self._joint_layers.append(
      tf.keras.layers.Dense(
        units=1,
        activation=output_activation_fn,
        kernel_initializer=tf.keras.initializers.RandomUniform(
          minval=-0.003, maxval=0.003),
        name='value'))
Exemple #21
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=None,
                 action_fc_layer_params=None,
                 joint_fc_layer_params=None,
                 activation_fn=tf.nn.relu,
                 name='CriticNetwork'):
        """Creates an instance of `CriticNetwork`.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
        super(CriticNetwork, self).__init__(observation_spec=observation_spec,
                                            action_spec=action_spec,
                                            state_spec=(),
                                            name=name)

        if len(nest.flatten(observation_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network')

        flat_action_spec = nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        # TODO(kbanoop): Replace mlp_layers with encoding networks.
        self._observation_layers = utils.mlp_layers(
            observation_conv_layer_params,
            observation_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='observation_encoding')

        self._action_layers = utils.mlp_layers(
            None,
            action_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value'))
Exemple #22
0
    def __init__(self,
                 input_tensor_spec,
                 fc_layer_params=(75, 40),
                 dropout_layer_params=None,
                 conv_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 name='ValueNetwork'):
        """Creates an instance of `ValueNetwork`.

    Network supports calls with shape outer_rank + observation_spec.shape. Note
    outer_rank must be at least 1.

    Args:
      input_tensor_spec: A `tensor_spec.TensorSpec` or a tuple of specs
        representing the input observations.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      dropout_layer_params: Optional list of dropout layer parameters, each item
        is the fraction of input units to drop or a dictionary of parameters
        according to the keras.Dropout documentation. The additional parameter
        `permanent', if set to True, allows to apply dropout at inference for
        approximated Bayesian inference. The dropout layers are interleaved with
        the fully connected layers; there is a dropout layer after each fully
        connected layer, except if the entry in the list is None. This list must
        have the same length of fc_layer_params, or be None.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.keras.activations.relu,.
      name: A string representing name of the network.

    Raises:
      ValueError: If input_tensor_spec is not an instance of network.InputSpec.
      ValueError: If `input_tensor_spec.observations` contains more than one
      observation.
    """
        super(ValueNetwork, self).__init__(input_tensor_spec=input_tensor_spec,
                                           state_spec=(),
                                           name=name)

        if len(tf.nest.flatten(input_tensor_spec)) > 1:
            raise ValueError(
                'Network only supports observation specs with a single observation.'
            )

        self._postprocessing_layers = utils.mlp_layers(
            conv_layer_params,
            fc_layer_params,
            dropout_layer_params=dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(
            ),
            name='input_mlp')

        self._postprocessing_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.compat.v1.initializers.random_uniform(
                    minval=-0.03, maxval=0.03),
            ))
Exemple #23
0
    def __init__(self,
                 input_tensor_spec,
                 output_tensor_spec,
                 fc_layer_params=None,
                 conv_layer_params=None,
                 activation_fn=tf.keras.activations.relu,
                 name='ActorNetwork'):
        """Creates an instance of `ActorNetwork`.

    Args:
      input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the
        inputs.
      output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing
        the outputs.
      fc_layer_params: Optional list of fully_connected parameters, where each
        item is the number of units in the layer.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Raises:
      ValueError: If `input_tensor_spec` or `action_spec` contains more than one
        item, or if the action data type is not `float`.
    """

        super(ActorNetwork, self).__init__(input_tensor_spec=input_tensor_spec,
                                           state_spec=(),
                                           name=name)

        if len(tf.nest.flatten(input_tensor_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network')

        flat_action_spec = tf.nest.flatten(output_tensor_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        if self._single_action_spec.dtype not in [tf.float32, tf.float64]:
            raise ValueError(
                'Only float actions are supported by this network.')

        # TODO(kbanoop): Replace mlp_layers with encoding networks.
        self._mlp_layers = utils.mlp_layers(
            conv_layer_params,
            fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='input_mlp')

        self._mlp_layers.append(
            tf.keras.layers.Dense(
                flat_action_spec[0].shape.num_elements(),
                activation=tf.keras.activations.tanh,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='action'))

        self._output_tensor_spec = output_tensor_spec
Exemple #24
0
    def __init__(
            self,
            input_tensor_spec,
            # observation_conv_layer_params=None,
            # observation_fc_layer_params=None,
            # observation_dropout_layer_params=None,
            # action_fc_layer_params=None,
            # action_dropout_layer_params=None,
            preprocessing_layers,
            preprocessing_combiner,
            joint_fc_layer_params=None,
            joint_dropout_layer_params=None,
            joint_activation_fn=tf.nn.relu,
            output_activation_fn=None,
            kernel_initializer=None,
            last_kernel_initializer=None,
            name='CriticNetwork'):
        """Creates an instance of `CriticNetwork`.
    Args:
      input_tensor_spec: A tuple of (observation, action) each a nest of
        `tensor_spec.TensorSpec` representing the inputs.
      observation_conv_layer_params: Optional list of convolution layer
        parameters for observations, where each item is a length-three tuple
        indicating (num_units, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully connected parameters
        for observations, where each item is the number of units in the layer.
      observation_dropout_layer_params: Optional list of dropout layer
        parameters, each item is the fraction of input units to drop or a
        dictionary of parameters according to the keras.Dropout documentation.
        The additional parameter `permanent', if set to True, allows to apply
        dropout at inference for approximated Bayesian inference. The dropout
        layers are interleaved with the fully connected layers; there is a
        dropout layer after each fully connected layer, except if the entry in
        the list is None. This list must have the same length of
        observation_fc_layer_params, or be None.
      action_fc_layer_params: Optional list of fully connected parameters for
        actions, where each item is the number of units in the layer.
      action_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of action_fc_layer_params, or
        be None.
      joint_fc_layer_params: Optional list of fully connected parameters after
        merging observations and actions, where each item is the number of units
        in the layer.
      joint_dropout_layer_params: Optional list of dropout layer parameters,
        each item is the fraction of input units to drop or a dictionary of
        parameters according to the keras.Dropout documentation. The additional
        parameter `permanent', if set to True, allows to apply dropout at
        inference for approximated Bayesian inference. The dropout layers are
        interleaved with the fully connected layers; there is a dropout layer
        after each fully connected layer, except if the entry in the list is
        None. This list must have the same length of joint_fc_layer_params, or
        be None.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      output_activation_fn: Activation function for the last layer. This can be
        used to restrict the range of the output. For example, one can pass
        tf.keras.activations.sigmoid here to restrict the output to be bounded
        between 0 and 1.
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression
         layer. If None, a RandomUniform initializer will be used.
      name: A string representing name of the network.
    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        observation.
    """
        super(MultiObservationCriticNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        observation_spec, action_spec = input_tensor_spec

        flat_action_spec = tf.nest.flatten(action_spec)
        self._single_action_spec = flat_action_spec[0]
        # set up kernel_initializer
        if kernel_initializer is None:
            kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform')
        if last_kernel_initializer is None:
            last_kernel_initializer = tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003)
        # set up encoder_network
        self._encoder = encoding_network.EncodingNetwork(
            observation_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            conv_layer_params=None,
            fc_layer_params=None,
            dropout_layer_params=None,
            activation_fn=tf.keras.activations.relu,
            kernel_initializer=kernel_initializer,
            batch_squash=False)

        # TODO(kbanoop): Replace mlp_layers with encoding networks.
        # self._observation_layers = utils.mlp_layers(
        #     observation_conv_layer_params,
        #     observation_fc_layer_params,
        #     observation_dropout_layer_params,
        #     activation_fn=activation_fn,
        #     kernel_initializer=kernel_initializer,
        #     name='observation_encoding')

        # self._action_layers = utils.mlp_layers(
        #     None,
        #     action_fc_layer_params,
        #     action_dropout_layer_params,
        #     activation_fn=activation_fn,
        #     kernel_initializer=kernel_initializer,
        #     name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            joint_dropout_layer_params,
            activation_fn=joint_activation_fn,
            kernel_initializer=kernel_initializer,
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(1,
                                  activation=output_activation_fn,
                                  kernel_initializer=last_kernel_initializer,
                                  name='value'))
    def __init__(self,
                 observation_spec,
                 action_spec,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=(200, ),
                 action_fc_layer_params=(200, ),
                 joint_fc_layer_params=(100),
                 lstm_size=(40, ),
                 output_fc_layer_params=(200, 100),
                 activation_fn=tf.keras.activations.relu,
                 name='CriticRnnNetwork'):
        """Creates an instance of `CriticRnnNetwork`.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      observation_conv_layer_params: Optional list of convolution layers
        parameters to apply to the observations, where each item is a
        length-three tuple indicating (filters, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully_connected parameters,
        where each item is the number of units in the layer. This is applied
        after the observation convultional layer.
      action_fc_layer_params: Optional list of parameters for a fully_connected
        layer to apply to the actions, where each item is the number of units
        in the layer.
      joint_fc_layer_params: Optional list of parameters for a fully_connected
        layer to apply after merging observations and actions, where each item
        is the number of units in the layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Returns:
      A tf.float32 Tensor of q-values.

    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        item.
    """
        if len(nest.flatten(observation_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network.')

        if len(nest.flatten(action_spec)) > 1:
            raise ValueError(
                'Only a single action is supported by this network.')

        observation_layers = utils.mlp_layers(
            observation_conv_layer_params,
            observation_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='observation_encoding')

        action_layers = utils.mlp_layers(
            None,
            action_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='action_encoding')

        joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='joint_mlp')

        # Create RNN cell
        if len(lstm_size) == 1:
            cell = tf.keras.layers.LSTMCell(lstm_size[0])
        else:
            cell = tf.keras.layers.StackedRNNCells(
                [tf.keras.layers.LSTMCell(size) for size in lstm_size])

        state_spec = nest.map_structure(
            functools.partial(tensor_spec.TensorSpec,
                              dtype=tf.float32,
                              name='network_state_spec'),
            list(cell.state_size))

        output_layers = utils.mlp_layers(
            fc_layer_params=output_fc_layer_params, name='output')

        output_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value'))

        super(CriticRnnNetwork,
              self).__init__(observation_spec=observation_spec,
                             action_spec=action_spec,
                             state_spec=state_spec,
                             name=name)

        self._observation_layers = observation_layers
        self._action_layers = action_layers
        self._joint_layers = joint_layers
        self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell)
        self._output_layers = output_layers
    def __init__(self,
                 input_tensor_spec,
                 preprocessing_combiner=None,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=None,
                 observation_dropout_layer_params=None,
                 action_fc_layer_params=None,
                 action_dropout_layer_params=None,
                 joint_fc_layer_params=None,
                 joint_dropout_layer_params=None,
                 activation_fn=tf.nn.relu,
                 output_activation_fn=None,
                 mask_xy=False,
                 name='CriticNetwork'):

        super(CriticNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=(),
                             name=name)

        self._mask_xy = mask_xy

        observation_spec, action_spec = input_tensor_spec
        flat_action_spec = tf.nest.flatten(action_spec)
        if len(flat_action_spec) > 1:
            raise ValueError(
                'Only a single action is supported by this network')
        self._single_action_spec = flat_action_spec[0]

        self._observation_layers = utils.mlp_layers(
            observation_conv_layer_params,
            observation_fc_layer_params,
            observation_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='observation_encoding')

        self._action_layers = utils.mlp_layers(
            None,
            action_fc_layer_params,
            action_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='action_encoding')

        self._joint_layers = utils.mlp_layers(
            None,
            joint_fc_layer_params,
            joint_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform'),
            name='joint_mlp')

        self._joint_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=output_activation_fn,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='value'))

        self._preprocessing_combiner = preprocessing_combiner
Exemple #27
0
    def __init__(self,
                 observation_spec,
                 action_spec,
                 conv_layer_params=None,
                 input_fc_layer_params=(200, 100),
                 lstm_size=(40, ),
                 output_fc_layer_params=(200, 100),
                 activation_fn=tf.keras.activations.relu,
                 name='ActorRnnNetwork'):
        """Creates an instance of `ActorRnnNetwork`.

    Args:
      observation_spec: A nest of `tensor_spec.TensorSpec` representing the
        observations.
      action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the
        actions.
      conv_layer_params: Optional list of convolution layers parameters, where
        each item is a length-three tuple indicating (filters, kernel_size,
        stride).
      input_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied before
        the LSTM cell.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      name: A string representing name of the network.

    Returns:
      A nest of action tensors matching the action_spec.

    Raises:
      ValueError: If `observation_spec` contains more than one observation.
    """
        if len(nest.flatten(observation_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network')

        input_layers = utils.mlp_layers(
            conv_layer_params,
            input_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.keras.initializers.glorot_uniform(),
            name='input_mlp')

        # Create RNN cell
        if len(lstm_size) == 1:
            cell = tf.keras.layers.LSTMCell(lstm_size[0])
        else:
            cell = tf.keras.layers.StackedRNNCells(
                [tf.keras.layers.LSTMCell(size) for size in lstm_size])

        state_spec = nest.map_structure(
            functools.partial(tensor_spec.TensorSpec,
                              dtype=tf.float32,
                              name='network_state_spec'),
            list(cell.state_size))

        output_layers = utils.mlp_layers(
            fc_layer_params=output_fc_layer_params, name='output')

        flat_action_spec = nest.flatten(action_spec)
        action_layers = [
            tf.keras.layers.Dense(
                single_action_spec.shape.num_elements(),
                activation=tf.keras.activations.tanh,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003),
                name='action') for single_action_spec in flat_action_spec
        ]

        super(ActorRnnNetwork,
              self).__init__(observation_spec=observation_spec,
                             action_spec=action_spec,
                             state_spec=state_spec,
                             name=name)

        self._flat_action_spec = flat_action_spec
        self._conv_layer_params = conv_layer_params
        self._input_layers = input_layers
        self._cell = cell
        self._output_layers = output_layers
        self._action_layers = action_layers
Exemple #28
0
def train_eval(
        root_dir,
        gpu=0,
        env_load_fn=None,
        model_ids=None,
        eval_env_mode='headless',
        num_iterations=1000000,
        conv_layer_params=None,
        encoder_fc_layers=[256],
        actor_fc_layers=[400, 300],
        critic_obs_fc_layers=[400],
        critic_action_fc_layers=None,
        critic_joint_fc_layers=[300],
        # Params for collect
        initial_collect_steps=1000,
        collect_steps_per_iteration=1,
        num_parallel_environments=1,
        replay_buffer_capacity=100000,
        ou_stddev=0.2,
        ou_damping=0.15,
        # Params for target update
        target_update_tau=0.05,
        target_update_period=5,
        # Params for train
        train_steps_per_iteration=1,
        batch_size=64,
        actor_learning_rate=1e-4,
        critic_learning_rate=1e-3,
        dqda_clipping=None,
        td_errors_loss_fn=tf.compat.v1.losses.huber_loss,
        gamma=0.995,
        reward_scale_factor=1.0,
        gradient_clipping=None,
        # Params for eval
        num_eval_episodes=10,
        eval_interval=10000,
        eval_only=False,
        eval_deterministic=False,
        num_parallel_environments_eval=1,
        model_ids_eval=None,
        # Params for checkpoints, summaries, and logging
        train_checkpoint_interval=10000,
        policy_checkpoint_interval=10000,
        rb_checkpoint_interval=50000,
        log_interval=100,
        summary_interval=1000,
        summaries_flush_secs=10,
        debug_summaries=False,
        summarize_grads_and_vars=False,
        eval_metrics_callback=None):
    """A simple train and eval for DDPG."""
    root_dir = os.path.expanduser(root_dir)
    train_dir = os.path.join(root_dir, 'train')
    eval_dir = os.path.join(root_dir, 'eval')

    train_summary_writer = tf.compat.v2.summary.create_file_writer(
        train_dir, flush_millis=summaries_flush_secs * 1000)
    train_summary_writer.set_as_default()

    eval_summary_writer = tf.compat.v2.summary.create_file_writer(
        eval_dir, flush_millis=summaries_flush_secs * 1000)
    eval_metrics = [
        batched_py_metric.BatchedPyMetric(
            py_metrics.AverageReturnMetric,
            metric_args={'buffer_size': num_eval_episodes},
            batch_size=num_parallel_environments_eval),
        batched_py_metric.BatchedPyMetric(
            py_metrics.AverageEpisodeLengthMetric,
            metric_args={'buffer_size': num_eval_episodes},
            batch_size=num_parallel_environments_eval),
    ]
    eval_summary_flush_op = eval_summary_writer.flush()

    global_step = tf.compat.v1.train.get_or_create_global_step()
    with tf.compat.v2.summary.record_if(
            lambda: tf.math.equal(global_step % summary_interval, 0)):
        if model_ids is None:
            model_ids = [None] * num_parallel_environments
        else:
            assert len(model_ids) == num_parallel_environments, \
                'model ids provided, but length not equal to num_parallel_environments'

        if model_ids_eval is None:
            model_ids_eval = [None] * num_parallel_environments_eval
        else:
            assert len(model_ids_eval) == num_parallel_environments_eval,\
                'model ids eval provided, but length not equal to num_parallel_environments_eval'

        tf_py_env = [
            lambda model_id=model_ids[i]: env_load_fn(model_id, 'headless', gpu
                                                      )
            for i in range(num_parallel_environments)
        ]
        tf_env = tf_py_environment.TFPyEnvironment(
            parallel_py_environment.ParallelPyEnvironment(tf_py_env))

        if eval_env_mode == 'gui':
            assert num_parallel_environments_eval == 1, 'only one GUI env is allowed'
        eval_py_env = [
            lambda model_id=model_ids_eval[i]: env_load_fn(
                model_id, eval_env_mode, gpu)
            for i in range(num_parallel_environments_eval)
        ]
        eval_py_env = parallel_py_environment.ParallelPyEnvironment(
            eval_py_env)

        # Get the data specs from the environment
        time_step_spec = tf_env.time_step_spec()
        observation_spec = time_step_spec.observation
        action_spec = tf_env.action_spec()
        print('observation_spec', observation_spec)
        print('action_spec', action_spec)

        glorot_uniform_initializer = tf.compat.v1.keras.initializers.glorot_uniform(
        )
        preprocessing_layers = {
            'depth_seg':
            tf.keras.Sequential(
                mlp_layers(
                    conv_layer_params=conv_layer_params,
                    fc_layer_params=encoder_fc_layers,
                    kernel_initializer=glorot_uniform_initializer,
                )),
            'sensor':
            tf.keras.Sequential(
                mlp_layers(
                    conv_layer_params=None,
                    fc_layer_params=encoder_fc_layers,
                    kernel_initializer=glorot_uniform_initializer,
                )),
        }
        preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1)

        actor_net = actor_network.ActorNetwork(
            observation_spec,
            action_spec,
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            fc_layer_params=actor_fc_layers,
            kernel_initializer=glorot_uniform_initializer,
        )

        critic_net = critic_network.CriticNetwork(
            (observation_spec, action_spec),
            preprocessing_layers=preprocessing_layers,
            preprocessing_combiner=preprocessing_combiner,
            observation_fc_layer_params=critic_obs_fc_layers,
            action_fc_layer_params=critic_action_fc_layers,
            joint_fc_layer_params=critic_joint_fc_layers,
            kernel_initializer=glorot_uniform_initializer,
        )

        tf_agent = ddpg_agent.DdpgAgent(
            tf_env.time_step_spec(),
            tf_env.action_spec(),
            actor_network=actor_net,
            critic_network=critic_net,
            actor_optimizer=tf.compat.v1.train.AdamOptimizer(
                learning_rate=actor_learning_rate),
            critic_optimizer=tf.compat.v1.train.AdamOptimizer(
                learning_rate=critic_learning_rate),
            ou_stddev=ou_stddev,
            ou_damping=ou_damping,
            target_update_tau=target_update_tau,
            target_update_period=target_update_period,
            dqda_clipping=dqda_clipping,
            td_errors_loss_fn=td_errors_loss_fn,
            gamma=gamma,
            reward_scale_factor=reward_scale_factor,
            gradient_clipping=gradient_clipping,
            debug_summaries=debug_summaries,
            summarize_grads_and_vars=summarize_grads_and_vars,
            train_step_counter=global_step)

        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.compat.v1.Session(config=config)

        replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
            data_spec=tf_agent.collect_data_spec,
            batch_size=tf_env.batch_size,
            max_length=replay_buffer_capacity)
        replay_observer = [replay_buffer.add_batch]

        if eval_deterministic:
            eval_py_policy = py_tf_policy.PyTFPolicy(
                greedy_policy.GreedyPolicy(tf_agent.policy))
        else:
            eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.policy)

        step_metrics = [
            tf_metrics.NumberOfEpisodes(),
            tf_metrics.EnvironmentSteps(),
        ]
        train_metrics = step_metrics + [
            tf_metrics.AverageReturnMetric(
                buffer_size=100, batch_size=num_parallel_environments),
            tf_metrics.AverageEpisodeLengthMetric(
                buffer_size=100, batch_size=num_parallel_environments),
        ]

        collect_policy = tf_agent.collect_policy
        initial_collect_policy = random_tf_policy.RandomTFPolicy(
            time_step_spec, action_spec)

        initial_collect_op = dynamic_step_driver.DynamicStepDriver(
            tf_env,
            initial_collect_policy,
            observers=replay_observer + train_metrics,
            num_steps=initial_collect_steps * num_parallel_environments).run()

        collect_op = dynamic_step_driver.DynamicStepDriver(
            tf_env,
            collect_policy,
            observers=replay_observer + train_metrics,
            num_steps=collect_steps_per_iteration *
            num_parallel_environments).run()

        # Prepare replay buffer as dataset with invalid transitions filtered.
        def _filter_invalid_transition(trajectories, unused_arg1):
            return ~trajectories.is_boundary()[0]

        # Dataset generates trajectories with shape [Bx2x...]
        dataset = replay_buffer.as_dataset(
            num_parallel_calls=5,
            sample_batch_size=5 * batch_size,
            num_steps=2).apply(tf.data.experimental.unbatch()).filter(
                _filter_invalid_transition).batch(batch_size).prefetch(5)
        dataset_iterator = tf.compat.v1.data.make_initializable_iterator(
            dataset)
        trajectories, unused_info = dataset_iterator.get_next()
        train_op = tf_agent.train(trajectories)

        summary_ops = []
        for train_metric in train_metrics:
            summary_ops.append(
                train_metric.tf_summaries(train_step=global_step,
                                          step_metrics=step_metrics))

        with eval_summary_writer.as_default(), tf.compat.v2.summary.record_if(
                True):
            for eval_metric in eval_metrics:
                eval_metric.tf_summaries(train_step=global_step,
                                         step_metrics=step_metrics)

        train_checkpointer = common.Checkpointer(
            ckpt_dir=train_dir,
            agent=tf_agent,
            global_step=global_step,
            metrics=metric_utils.MetricsGroup(train_metrics, 'train_metrics'))
        policy_checkpointer = common.Checkpointer(ckpt_dir=os.path.join(
            train_dir, 'policy'),
                                                  policy=tf_agent.policy,
                                                  global_step=global_step)
        rb_checkpointer = common.Checkpointer(ckpt_dir=os.path.join(
            train_dir, 'replay_buffer'),
                                              max_to_keep=1,
                                              replay_buffer=replay_buffer)

        init_agent_op = tf_agent.initialize()
        with sess.as_default():
            # Initialize the graph.
            train_checkpointer.initialize_or_restore(sess)

            if eval_only:
                metric_utils.compute_summaries(
                    eval_metrics,
                    eval_py_env,
                    eval_py_policy,
                    num_episodes=num_eval_episodes,
                    global_step=0,
                    callback=eval_metrics_callback,
                    tf_summaries=False,
                    log=True,
                )
                episodes = eval_py_env.get_stored_episodes()
                episodes = [
                    episode for sublist in episodes for episode in sublist
                ][:num_eval_episodes]
                metrics = episode_utils.get_metrics(episodes)
                for key in sorted(metrics.keys()):
                    print(key, ':', metrics[key])

                save_path = os.path.join(eval_dir, 'episodes_vis.pkl')
                episode_utils.save(episodes, save_path)
                print('EVAL DONE')
                return

            # Initialize training.
            rb_checkpointer.initialize_or_restore(sess)
            sess.run(dataset_iterator.initializer)
            common.initialize_uninitialized_variables(sess)
            sess.run(init_agent_op)
            sess.run(train_summary_writer.init())
            sess.run(eval_summary_writer.init())

            global_step_val = sess.run(global_step)
            if global_step_val == 0:
                # Initial eval of randomly initialized policy
                metric_utils.compute_summaries(
                    eval_metrics,
                    eval_py_env,
                    eval_py_policy,
                    num_episodes=num_eval_episodes,
                    global_step=0,
                    callback=eval_metrics_callback,
                    tf_summaries=True,
                    log=True,
                )
                # Run initial collect.
                logging.info('Global step %d: Running initial collect op.',
                             global_step_val)
                sess.run(initial_collect_op)

                # Checkpoint the initial replay buffer contents.
                rb_checkpointer.save(global_step=global_step_val)

                logging.info('Finished initial collect.')
            else:
                logging.info('Global step %d: Skipping initial collect op.',
                             global_step_val)

            collect_call = sess.make_callable(collect_op)
            train_step_call = sess.make_callable([train_op, summary_ops])
            global_step_call = sess.make_callable(global_step)

            timed_at_step = sess.run(global_step)
            time_acc = 0
            steps_per_second_ph = tf.compat.v1.placeholder(
                tf.float32, shape=(), name='steps_per_sec_ph')
            steps_per_second_summary = tf.compat.v2.summary.scalar(
                name='global_steps_per_sec',
                data=steps_per_second_ph,
                step=global_step)

            for _ in range(num_iterations):
                start_time = time.time()
                collect_call()
                # print('collect:', time.time() - start_time)

                # train_start_time = time.time()
                for _ in range(train_steps_per_iteration):
                    loss_info_value, _ = train_step_call()
                # print('train:', time.time() - train_start_time)

                time_acc += time.time() - start_time
                global_step_val = global_step_call()
                if global_step_val % log_interval == 0:
                    logging.info('step = %d, loss = %f', global_step_val,
                                 loss_info_value.loss)
                    steps_per_sec = (global_step_val -
                                     timed_at_step) / time_acc
                    logging.info('%.3f steps/sec', steps_per_sec)
                    sess.run(steps_per_second_summary,
                             feed_dict={steps_per_second_ph: steps_per_sec})
                    timed_at_step = global_step_val
                    time_acc = 0

                if global_step_val % train_checkpoint_interval == 0:
                    train_checkpointer.save(global_step=global_step_val)

                if global_step_val % policy_checkpoint_interval == 0:
                    policy_checkpointer.save(global_step=global_step_val)

                if global_step_val % rb_checkpoint_interval == 0:
                    rb_checkpointer.save(global_step=global_step_val)

                if global_step_val % eval_interval == 0:
                    metric_utils.compute_summaries(
                        eval_metrics,
                        eval_py_env,
                        eval_py_policy,
                        num_episodes=num_eval_episodes,
                        global_step=0,
                        callback=eval_metrics_callback,
                        tf_summaries=True,
                        log=True,
                    )
                    with eval_summary_writer.as_default(
                    ), tf.compat.v2.summary.record_if(True):
                        with tf.name_scope('Metrics/'):
                            episodes = eval_py_env.get_stored_episodes()
                            episodes = [
                                episode for sublist in episodes
                                for episode in sublist
                            ][:num_eval_episodes]
                            metrics = episode_utils.get_metrics(episodes)
                            for key in sorted(metrics.keys()):
                                print(key, ':', metrics[key])
                                metric_op = tf.compat.v2.summary.scalar(
                                    name=key,
                                    data=metrics[key],
                                    step=global_step_val)
                                sess.run(metric_op)
                    sess.run(eval_summary_flush_op)

        sess.close()
Exemple #29
0
    def __init__(self,
                 input_tensor_spec,
                 observation_conv_layer_params=None,
                 observation_fc_layer_params=(200, ),
                 action_fc_layer_params=(200, ),
                 joint_fc_layer_params=(100, ),
                 lstm_size=None,
                 output_fc_layer_params=(200, 100),
                 activation_fn=tf.keras.activations.relu,
                 kernel_initializer=None,
                 last_kernel_initializer=None,
                 rnn_construction_fn=None,
                 rnn_construction_kwargs=None,
                 name='CriticRnnNetwork'):
        """Creates an instance of `CriticRnnNetwork`.

    Args:
      input_tensor_spec: A tuple of (observation, action) each of type
        `tensor_spec.TensorSpec` representing the inputs.
      observation_conv_layer_params: Optional list of convolution layers
        parameters to apply to the observations, where each item is a
        length-three tuple indicating (filters, kernel_size, stride).
      observation_fc_layer_params: Optional list of fully_connected parameters,
        where each item is the number of units in the layer. This is applied
        after the observation convultional layer.
      action_fc_layer_params: Optional list of parameters for a fully_connected
        layer to apply to the actions, where each item is the number of units
        in the layer.
      joint_fc_layer_params: Optional list of parameters for a fully_connected
        layer to apply after merging observations and actions, where each item
        is the number of units in the layer.
      lstm_size: An iterable of ints specifying the LSTM cell sizes to use.
      output_fc_layer_params: Optional list of fully_connected parameters, where
        each item is the number of units in the layer. This is applied after the
        LSTM cell.
      activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ...
      kernel_initializer: kernel initializer for all layers except for the value
        regression layer. If None, a VarianceScaling initializer will be used.
      last_kernel_initializer: kernel initializer for the value regression layer
        . If None, a RandomUniform initializer will be used.
      rnn_construction_fn: (Optional.) Alternate RNN construction function, e.g.
        tf.keras.layers.LSTM, tf.keras.layers.CuDNNLSTM. It is invalid to
        provide both rnn_construction_fn and lstm_size.
      rnn_construction_kwargs: (Optional.) Dictionary or arguments to pass to
        rnn_construction_fn.

        The RNN will be constructed via:

        ```
        rnn_layer = rnn_construction_fn(**rnn_construction_kwargs)
        ```
      name: A string representing name of the network.

    Raises:
      ValueError: If `observation_spec` or `action_spec` contains more than one
        item.
      ValueError: If neither `lstm_size` nor `rnn_construction_fn` are provided.
      ValueError: If both `lstm_size` and `rnn_construction_fn` are provided.
    """
        if lstm_size is None and rnn_construction_fn is None:
            raise ValueError(
                'Need to provide either custom rnn_construction_fn or '
                'lstm_size.')
        if lstm_size and rnn_construction_fn:
            raise ValueError(
                'Cannot provide both custom rnn_construction_fn and '
                'lstm_size.')

        observation_spec, action_spec = input_tensor_spec

        if len(tf.nest.flatten(observation_spec)) > 1:
            raise ValueError(
                'Only a single observation is supported by this network.')

        if len(tf.nest.flatten(action_spec)) > 1:
            raise ValueError(
                'Only a single action is supported by this network.')

        if kernel_initializer is None:
            kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1. / 3., mode='fan_in', distribution='uniform')
        if last_kernel_initializer is None:
            last_kernel_initializer = tf.keras.initializers.RandomUniform(
                minval=-0.003, maxval=0.003)

        observation_layers = utils.mlp_layers(
            observation_conv_layer_params,
            observation_fc_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            name='observation_encoding')

        action_layers = utils.mlp_layers(None,
                                         action_fc_layer_params,
                                         activation_fn=activation_fn,
                                         kernel_initializer=kernel_initializer,
                                         name='action_encoding')

        joint_layers = utils.mlp_layers(None,
                                        joint_fc_layer_params,
                                        activation_fn=activation_fn,
                                        kernel_initializer=kernel_initializer,
                                        name='joint_mlp')

        # Create RNN cell
        if rnn_construction_fn:
            rnn_construction_kwargs = rnn_construction_kwargs or {}
            lstm_network = rnn_construction_fn(**rnn_construction_kwargs)
        else:
            if len(lstm_size) == 1:
                cell = tf.keras.layers.LSTMCell(lstm_size[0])
            else:
                cell = tf.keras.layers.StackedRNNCells(
                    [tf.keras.layers.LSTMCell(size) for size in lstm_size])
            lstm_network = dynamic_unroll_layer.DynamicUnroll(cell)

        counter = [-1]

        def create_spec(size):
            counter[0] += 1
            return tensor_spec.TensorSpec(size,
                                          dtype=tf.float32,
                                          name='network_state_%d' % counter[0])

        state_spec = tf.nest.map_structure(create_spec,
                                           lstm_network.cell.state_size)

        output_layers = utils.mlp_layers(
            fc_layer_params=output_fc_layer_params, name='output')

        output_layers.append(
            tf.keras.layers.Dense(1,
                                  activation=None,
                                  kernel_initializer=last_kernel_initializer,
                                  name='value'))

        super(CriticRnnNetwork,
              self).__init__(input_tensor_spec=input_tensor_spec,
                             state_spec=state_spec,
                             name=name)

        self._observation_layers = observation_layers
        self._action_layers = action_layers
        self._joint_layers = joint_layers
        self._lstm_network = lstm_network
        self._output_layers = output_layers
Exemple #30
0
    def __init__(
        self,
        input_tensor_spec,
        observation_preprocessing_layers=None,
        observation_preprocessing_combiner=None,
        observation_conv_layer_params=None,
        observation_fc_layer_params=(200, 200),
        observation_dropout_layer_params=None,
        action_fc_layer_params=None,
        action_dropout_layer_params=None,
        joint_fc_layer_params=None,
        joint_dropout_layer_params=None,
        activation_fn=tf.keras.activations.relu,
        kernel_initializer=None,
        batch_squash=True,
        dtype=tf.float32,
        name="CriticNetwork",
    ):

        super(CriticNetwork, self).__init__(
            input_tensor_spec=input_tensor_spec,
            state_spec=(),
            name=name
        )

        encoder_input_tensor_spec, _ = input_tensor_spec


        self._encoder = encoding_network.EncodingNetwork(
            encoder_input_tensor_spec,
            preprocessing_layers=observation_preprocessing_layers,
            preprocessing_combiner=observation_preprocessing_combiner,
            conv_layer_params=observation_conv_layer_params,
            fc_layer_params=observation_fc_layer_params,
            dropout_layer_params=observation_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=kernel_initializer,
            batch_squash=batch_squash,
            dtype=dtype,
            name="observation_encoding"
        )

        self._action_layers = utils.mlp_layers(
            conv_layer_params=None,
            fc_layer_params=action_fc_layer_params,
            dropout_layer_params=action_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1.0 / 3.0, mode="fan_in", distribution="uniform"
            ),
            name="action_encoding",
        )

        self._joint_layers = utils.mlp_layers(
            conv_layer_params=None,
            fc_layer_params=joint_fc_layer_params,
            dropout_layer_params=joint_dropout_layer_params,
            activation_fn=activation_fn,
            kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling(
                scale=1.0 / 3.0, mode="fan_in", distribution="uniform"
            ),
            name="joint_mlp",
        )

        self._joint_layers.append(
            tf.keras.layers.Dense(
                1,
                activation=None,
                kernel_initializer=tf.keras.initializers.RandomUniform(
                    minval=-0.003, maxval=0.003
                ),
                name="value",
            )
        )