def test_mlp_layers(self): layers = utils.mlp_layers(conv_layer_params=[(3, 4, 5), (4, 6, 8)], fc_layer_params=[10, 20], activation_fn=tf.keras.activations.tanh, name='testnet') self.assertEqual(5, len(layers)) self.assertAllEqual([ tf.keras.layers.Conv2D, tf.keras.layers.Conv2D, tf.keras.layers.Flatten, tf.keras.layers.Dense, tf.keras.layers.Dense ], [type(layer) for layer in layers]) layers = utils.mlp_layers(conv_layer_params=[(3, 4, 5), (4, 6, 8)], fc_layer_params=[10, 20], activation_fn=tf.keras.activations.tanh, dropout_layer_params=[0.5, 0.3], name='testnet') self.assertEqual(7, len(layers)) self.assertAllEqual([ tf.keras.layers.Conv2D, tf.keras.layers.Conv2D, tf.keras.layers.Flatten, tf.keras.layers.Dense, permanent_variable_rate_dropout.PermanentVariableRateDropout, tf.keras.layers.Dense, permanent_variable_rate_dropout.PermanentVariableRateDropout ], [type(layer) for layer in layers]) layers = utils.mlp_layers(conv_layer_params=[(3, 4, 5), (4, 6, 8)], fc_layer_params=[10, 20], activation_fn=tf.keras.activations.tanh, dropout_layer_params=[None, 0.3], name='testnet') self.assertEqual(6, len(layers)) self.assertAllEqual([ tf.keras.layers.Conv2D, tf.keras.layers.Conv2D, tf.keras.layers.Flatten, tf.keras.layers.Dense, tf.keras.layers.Dense, permanent_variable_rate_dropout.PermanentVariableRateDropout ], [type(layer) for layer in layers]) layers = utils.mlp_layers( conv_layer_params=[(3, 4, 5), (4, 6, 8)], fc_layer_params=[10, 20], activation_fn=tf.keras.activations.tanh, dropout_layer_params=[dict(rate=0.5, permanent=True), None], name='testnet') self.assertEqual(6, len(layers)) self.assertAllEqual([ tf.keras.layers.Conv2D, tf.keras.layers.Conv2D, tf.keras.layers.Flatten, tf.keras.layers.Dense, permanent_variable_rate_dropout.PermanentVariableRateDropout, tf.keras.layers.Dense ], [type(layer) for layer in layers])
def __init__(self, observation_spec, action_spec, fc_layer_params=(200, 100), conv_layer_params=None, activation_fn=tf.keras.activations.relu, categorical_projection_net=_categorical_projection_net, normal_projection_net=_normal_projection_net, name='ActorDistributionNetwork'): """Creates an instance of `ActorDistributionNetwork`. Args: observation_spec: A nest of `tensor_spec.TensorSpec` representing the observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... categorical_projection_net: Callable that generates a categorical projection network to be called with some hidden state and the outer_rank of the state. normal_projection_net: Callable that generates a normal projection network to be called with some hidden state and the outer_rank of the state. name: A string representing name of the network. Raises: ValueError: If `observation_spec` contains more than one observation. """ super(ActorDistributionNetwork, self).__init__( observation_spec=observation_spec, action_spec=action_spec, state_spec=(), name=name) if len(nest.flatten(observation_spec)) > 1: raise ValueError('Only a single observation is supported by this network') self._mlp_layers = utils.mlp_layers( conv_layer_params, fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.glorot_uniform(), name='input_mlp') self._projection_networks = [] for single_output_spec in nest.flatten(action_spec): if single_output_spec.is_discrete(): self._projection_networks.append( categorical_projection_net(single_output_spec)) else: self._projection_networks.append( normal_projection_net(single_output_spec))
def __init__(self, input_tensor_spec, output_tensor_spec, fc_layer_params=(256, 256), conv_layer_params=None, activation_fn=tf.keras.activations.relu, name='ActorDistributionNetwork'): """Creates an instance of `ActorDistributionNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the output. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` or `output_tensor_spec` contains more than one spec. """ super(ActorDistributionNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=(), output_spec=output_tensor_spec, name=name) if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError('Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(output_tensor_spec) if len(flat_action_spec) > 1: raise ValueError('Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] # TODO(kbanoop): Replace mlp_layers with encoding networks. self._mlp_layers = utils.mlp_layers( conv_layer_params, fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(), name='input_mlp') self._mlp_layers.append( tf.keras.layers.Dense( 2 * self._single_action_spec.shape.num_elements(), activation=None, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(), name='normal_projection_layer'))
def __init__(self, input_tensor_spec, fc_layer_params=(75, 40), conv_layer_params=None, activation_fn=tf.keras.activations.relu, name='ValueNetwork'): """Creates an instance of `ValueNetwork`. Network supports calls with shape outer_rank + observation_spec.shape. Note outer_rank must be at least 1. Args: input_tensor_spec: A `tensor_spec.TensorSpec` or a tuple of specs representing the input observations. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.keras.activations.relu,. name: A string representing name of the network. Raises: ValueError: If input_tensor_spec is not an instance of network.InputSpec. ValueError: If `input_tensor_spec.observations` contains more than one observation. """ super(ValueNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=(), name=name) if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError( 'Network only supports observation specs with a single observation.') self._postprocessing_layers = utils.mlp_layers( conv_layer_params, fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(), name='input_mlp') self._postprocessing_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), ))
def __init__(self, input_tensor_spec, observation_conv_layer_params=None, observation_fc_layer_params=(256, ), action_fc_layer_params=None, joint_fc_layer_params=(256, ), activation_fn=tf.nn.relu, name='CriticNetwork', output_dim=None): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. output_dim: An integer specifying the number of outputs. If None, output will be flattened. """ self._output_dim = output_dim (_, action_spec) = input_tensor_spec modified_obs_spec = None modified_tensor_spec = (modified_obs_spec, action_spec) super(critic_network.CriticNetwork, self).__init__(input_tensor_spec=modified_tensor_spec, state_spec=(), name=name) self._input_tensor_spec = input_tensor_spec flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] self._observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='observation_encoding') self._action_layers = utils.mlp_layers( None, action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( self._output_dim if self._output_dim is not None else 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value'))
def __init__( self, root_dir, conv_1d_layer_params=[(32, 8, 4), (64, 4, 2), (64, 3, 1)], conv_2d_layer_params=[(32, (8, 8), 4), (64, (4, 4), 2), (64, (3, 3), 2)], encoder_fc_layers=[256], actor_fc_layers=[256], critic_obs_fc_layers=[256], critic_action_fc_layers=[256], critic_joint_fc_layers=[256], # Params for target update target_update_tau=0.005, target_update_period=1, # Params for train actor_learning_rate=3e-4, critic_learning_rate=3e-4, alpha_learning_rate=3e-4, td_errors_loss_fn=tf.compat.v1.losses.mean_squared_error, gamma=0.99, reward_scale_factor=1.0, gradient_clipping=None, # Params for eval eval_deterministic=False, # Params for summaries and logging debug_summaries=False, summarize_grads_and_vars=False): '''A simple train and eval for SAC.''' tf.compat.v1.enable_resource_variables() root_dir = os.path.expanduser(root_dir) policy_dir = os.path.join(root_dir, 'train', 'policy') time_step_spec = TimeStep( TensorSpec(shape=(), dtype=tf.int32, name='step_type'), TensorSpec(shape=(), dtype=tf.float32, name='reward'), BoundedTensorSpec(shape=(), dtype=tf.float32, name='discount', minimum=np.array(0., dtype=np.float32), maximum=np.array(1., dtype=np.float32)), collections.OrderedDict({ 'task_obs': BoundedTensorSpec(shape=(TASK_OBS_DIM, ), dtype=tf.float32, name=None, minimum=np.array(-3.4028235e+38, dtype=np.float32), maximum=np.array(3.4028235e+38, dtype=np.float32)), 'depth': BoundedTensorSpec(shape=(IMG_HEIGHT, IMG_WIDTH, 1), dtype=tf.float32, name=None, minimum=np.array(-1.0, dtype=np.float32), maximum=np.array(1.0, dtype=np.float32)), 'rgb': BoundedTensorSpec(shape=(IMG_HEIGHT, IMG_WIDTH, 3), dtype=tf.float32, name=None, minimum=np.array(-1.0, dtype=np.float32), maximum=np.array(1.0, dtype=np.float32)), })) observation_spec = time_step_spec.observation action_spec = BoundedTensorSpec(shape=(2, ), dtype=tf.float32, name=None, minimum=np.array(-1.0, dtype=np.float32), maximum=np.array(1.0, dtype=np.float32)) glorot_uniform_initializer = tf.compat.v1.keras.initializers.glorot_uniform( ) preprocessing_layers = {} if 'rgb' in observation_spec: preprocessing_layers['rgb'] = tf.keras.Sequential( mlp_layers( conv_1d_layer_params=None, conv_2d_layer_params=conv_2d_layer_params, fc_layer_params=encoder_fc_layers, kernel_initializer=glorot_uniform_initializer, )) if 'depth' in observation_spec: preprocessing_layers['depth'] = tf.keras.Sequential( mlp_layers( conv_1d_layer_params=None, conv_2d_layer_params=conv_2d_layer_params, fc_layer_params=encoder_fc_layers, kernel_initializer=glorot_uniform_initializer, )) if 'task_obs' in observation_spec: preprocessing_layers['task_obs'] = tf.keras.Sequential( mlp_layers( conv_1d_layer_params=None, conv_2d_layer_params=None, fc_layer_params=encoder_fc_layers, kernel_initializer=glorot_uniform_initializer, )) if len(preprocessing_layers) <= 1: preprocessing_combiner = None else: preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1) actor_net = actor_distribution_network.ActorDistributionNetwork( observation_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, fc_layer_params=actor_fc_layers, continuous_projection_net=normal_projection_net, kernel_initializer=glorot_uniform_initializer, ) critic_net = critic_network.CriticNetwork( (observation_spec, action_spec), preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, observation_fc_layer_params=critic_obs_fc_layers, action_fc_layer_params=critic_action_fc_layers, joint_fc_layer_params=critic_joint_fc_layers, kernel_initializer=glorot_uniform_initializer, ) global_step = tf.compat.v1.train.get_or_create_global_step() tf_agent = sac_agent.SacAgent( time_step_spec, action_spec, actor_network=actor_net, critic_network=critic_net, actor_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=actor_learning_rate), critic_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=critic_learning_rate), alpha_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=alpha_learning_rate), target_update_tau=target_update_tau, target_update_period=target_update_period, td_errors_loss_fn=td_errors_loss_fn, gamma=gamma, reward_scale_factor=reward_scale_factor, gradient_clipping=gradient_clipping, debug_summaries=debug_summaries, summarize_grads_and_vars=summarize_grads_and_vars, train_step_counter=global_step) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True self.sess = tf.compat.v1.Session(config=config) if eval_deterministic: self.eval_py_policy = py_tf_policy.PyTFPolicy( greedy_policy.GreedyPolicy(tf_agent.policy)) else: self.eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.policy) policy_checkpointer = common.Checkpointer(ckpt_dir=policy_dir, policy=tf_agent.policy, global_step=global_step) with self.sess.as_default(): # Initialize graph. policy_checkpointer.initialize_or_restore(self.sess) # activate the session obs = { 'depth': np.ones((IMG_HEIGHT, IMG_WIDTH, 1)), 'rgb': np.ones((IMG_HEIGHT, IMG_WIDTH, 3)), 'task_obs': np.ones((TASK_OBS_DIM, )) } action = self.act(obs) print('activate TF session') print('action', action)
def __init__(self, input_tensor_spec, observation_conv_layer_params=None, observation_fc_layer_params=None, observation_dropout_layer_params=None, action_fc_layer_params=None, action_dropout_layer_params=None, joint_fc_layer_params=None, joint_dropout_layer_params=None, activation_fn=tf.nn.relu, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. observation_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of observation_fc_layer_params, or be None. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. action_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of action_fc_layer_params, or be None. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ super(CriticNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec if len(tf.nest.flatten(observation_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] # TODO(kbanoop): Replace mlp_layers with encoding networks. self._observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, observation_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='observation_encoding') self._action_layers = utils.mlp_layers( None, action_fc_layer_params, action_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, joint_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value'))
def __init__(self, input_tensor_spec, observation_preprocessing_layers=None, observation_preprocessing_combiner=None, observation_conv_layer_params=None, observation_fc_layer_params=None, observation_dropout_layer_params=None, action_fc_layer_params=None, action_dropout_layer_params=None, joint_fc_layer_params=None, joint_dropout_layer_params=None, activation_fn=tf.nn.relu, kernel_initializer=None, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. This CriticNetwork supports handling complex observations with preprocessing_layer and preprocessing_combiner. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. observation_preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. observation_preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. observation_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of observation_fc_layer_params, or be None. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. action_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of action_fc_layer_params, or be None. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... kernel_initializer: Initializer to use for the kernels of the conv and dense layers. If none is provided a default variance_scaling_initializer name: A string representing name of the network. Raises: ValueError: If `action_spec` contains more than one observation. """ super(CriticNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') if not kernel_initializer: kernel_initializer = tf.compat.v1.keras.initializers.glorot_uniform( ) self._encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=observation_preprocessing_layers, preprocessing_combiner=observation_preprocessing_combiner, conv_layer_params=observation_conv_layer_params, fc_layer_params=observation_fc_layer_params, dropout_layer_params=observation_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer) self._single_action_spec = flat_action_spec[0] self._action_layers = utils.mlp_layers( fc_layer_params=action_fc_layer_params, dropout_layer_params=action_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding') self._joint_layers = utils.mlp_layers( fc_layer_params=joint_fc_layer_params, dropout_layer_params=joint_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value'))
def __init__(self, input_tensor_spec, preprocessing_layers=None, preprocessing_combiner=None, batch_squash=True, observation_conv_layer_params=None, observation_fc_layer_params=None, observation_dropout_layer_params=None, action_fc_layer_params=None, action_dropout_layer_params=None, joint_fc_layer_params=None, joint_dropout_layer_params=None, activation_fn=tf.nn.relu, output_activation_fn=None, kernel_initializer=None, last_kernel_initializer=None, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. batch_squash: If True the outer_ranks of the observation are squashed into the batch dimension. This allow encoding networks to be used with observations with shape [BxTx...]. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. observation_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of observation_fc_layer_params, or be None. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. action_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of action_fc_layer_params, or be None. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... output_activation_fn: Activation function for the last layer. This can be used to restrict the range of the output. For example, one can pass tf.keras.activations.sigmoid here to restrict the output to be bounded between 0 and 1. kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer. If None, a RandomUniform initializer will be used. name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ super(PaintingCriticNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec # if len(tf.nest.flatten(observation_spec)) > 1: # raise ValueError('Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError('Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=observation_conv_layer_params, fc_layer_params=observation_fc_layer_params, dropout_layer_params=observation_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, name='observation_encoding') self._encoder = encoder self._action_layers = utils.mlp_layers( None, action_fc_layer_params, action_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, joint_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( 1, activation=output_activation_fn, kernel_initializer=last_kernel_initializer, name='value'))
def __init__(self, input_tensor_spec, observation_conv_layer_params=None, observation_fc_layer_params=None, action_fc_layer_params=None, joint_fc_layer_params=(256, 256), activation_fn=tf.nn.relu, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one spec. """ super(CriticNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec if len(tf.nest.flatten(observation_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] # TODO(kbanoop): Replace mlp_layers with encoding networks. self._observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform( ), name='observation_encoding') self._action_layers = utils.mlp_layers( None, action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform( ), name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform( ), name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense(1, activation=None, kernel_initializer=tf.compat.v1.keras. initializers.glorot_uniform(), name='value'))
def __init__(self, input_tensor_spec, output_tensor_spec, fc_layer_params=(200, 100), dropout_layer_params=None, conv_layer_params=None, activation_fn=tf.keras.activations.relu, discrete_projection_net=_categorical_projection_net, continuous_projection_net=_normal_projection_net, name='ActorDistributionNetwork'): """Creates an instance of `ActorDistributionNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the output. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... discrete_projection_net: Callable that generates a discrete projection network to be called with some hidden state and the outer_rank of the state. continuous_projection_net: Callable that generates a continuous projection network to be called with some hidden state and the outer_rank of the state. name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` contains more than one observation. """ if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError('Only a single observation is supported by this network') mlp_layers = utils.mlp_layers( conv_layer_params, fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(), dropout_layer_params=dropout_layer_params, name='input_mlp') def map_proj(spec): if tensor_spec.is_discrete(spec): return discrete_projection_net(spec) else: return continuous_projection_net(spec) projection_networks = tf.nest.map_structure(map_proj, output_tensor_spec) output_spec = tf.nest.map_structure(lambda proj_net: proj_net.output_spec, projection_networks) super(ActorDistributionNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=(), output_spec=output_spec, name=name) self._mlp_layers = mlp_layers self._projection_networks = projection_networks self._output_tensor_spec = output_tensor_spec
def __init__(self, input_tensor_spec, conv_layer_params=None, input_fc_layer_params=(75, 40), input_dropout_layer_params=None, lstm_size=(40,), output_fc_layer_params=(75, 40), activation_fn=tf.keras.activations.relu, name='ValueRnnNetwork'): """Creates an instance of `ValueRnnNetwork`. Network supports calls with shape outer_rank + input_tensor_shape.shape. Note outer_rank must be at least 1. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). input_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied before the LSTM cell. input_dropout_layer_params: Optional list of dropout layer parameters, where each item is the fraction of input units to drop. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of input_fc_layer_params, or be None. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.keras.activations.relu,. name: A string representing name of the network. Raises: ValueError: If `observation_spec` contains more than one observation. """ if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError( 'Network only supports observation_specs with a single observation.') input_layers = utils.mlp_layers( conv_layer_params, input_fc_layer_params, input_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(), name='input_mlp') # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) state_spec = tf.nest.map_structure( functools.partial( tensor_spec.TensorSpec, dtype=tf.float32, name='network_state_spec'), cell.state_size) output_layers = [] if output_fc_layer_params: output_layers = [ tf.keras.layers.Dense( num_units, activation=activation_fn, kernel_initializer=tf.compat.v1.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal'), name='output/dense') for num_units in output_fc_layer_params ] value_projection_layer = tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), ) state_spec = tf.nest.map_structure( functools.partial( tensor_spec.TensorSpec, dtype=tf.float32, name='network_state_spec'), list(cell.state_size)) super(ValueRnnNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=state_spec, name=name) self._conv_layer_params = conv_layer_params self._input_layers = input_layers self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell) self._output_layers = output_layers self._value_projection_layer = value_projection_layer
def __init__(self, input_tensor_spec, output_tensor_spec, input_fc_layer_params=(200, 100), output_fc_layer_params=(200, 100), conv_layer_params=None, lstm_size=(40,), activation_fn=tf.keras.activations.relu, categorical_projection_net=_categorical_projection_net, normal_projection_net=_normal_projection_net, name='ActorDistributionRnnNetwork'): """Creates an instance of `ActorDistributionRnnNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the output. input_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied before the LSTM cell. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). lstm_size: An iterable of ints specifying the LSTM cell sizes to use. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... categorical_projection_net: Callable that generates a categorical projection network to be called with some hidden state and the outer_rank of the state. normal_projection_net: Callable that generates a normal projection network to be called with some hidden state and the outer_rank of the state. name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` contains more than one observation. """ if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError('Only a single observation is supported by this network') input_layers = utils.mlp_layers( conv_layer_params, input_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(), name='input_mlp') # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) state_spec = tf.nest.map_structure( functools.partial( tensor_spec.TensorSpec, dtype=tf.float32, name='network_state_spec'), cell.state_size) output_layers = utils.mlp_layers( fc_layer_params=output_fc_layer_params, name='output') projection_networks = [] for single_output_spec in tf.nest.flatten(output_tensor_spec): if tensor_spec.is_discrete(single_output_spec): projection_networks.append( categorical_projection_net(single_output_spec)) else: projection_networks.append(normal_projection_net(single_output_spec)) projection_distribution_specs = [ proj_net.output_spec for proj_net in projection_networks ] output_spec = tf.nest.pack_sequence_as(output_tensor_spec, projection_distribution_specs) super(ActorDistributionRnnNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=state_spec, output_spec=output_spec, name=name) self._conv_layer_params = conv_layer_params self._input_layers = input_layers self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell) self._output_layers = output_layers self._projection_networks = projection_networks self._output_tensor_spec = output_tensor_spec
def train_eval( root_dir, gpu='1', env_load_fn=None, model_ids=None, eval_env_mode='headless', conv_layer_params=None, encoder_fc_layers=[256], actor_fc_layers=[256, 256], value_fc_layers=[256, 256], use_rnns=False, # Params for collect num_environment_steps=10000000, collect_episodes_per_iteration=30, num_parallel_environments=30, replay_buffer_capacity=1001, # Per-environment # Params for train num_epochs=25, learning_rate=1e-4, # Params for eval num_eval_episodes=30, eval_interval=500, eval_only=False, eval_deterministic=False, num_parallel_environments_eval=1, model_ids_eval=None, # Params for summaries and logging train_checkpoint_interval=500, policy_checkpoint_interval=500, rb_checkpoint_interval=500, log_interval=10, summary_interval=50, summaries_flush_secs=1, debug_summaries=False, summarize_grads_and_vars=False, eval_metrics_callback=None): """A simple train and eval for PPO.""" if root_dir is None: raise AttributeError('train_eval requires a root_dir.') root_dir = os.path.expanduser(root_dir) train_dir = os.path.join(root_dir, 'train') eval_dir = os.path.join(root_dir, 'eval') train_summary_writer = tf.compat.v2.summary.create_file_writer( train_dir, flush_millis=summaries_flush_secs * 1000) train_summary_writer.set_as_default() eval_summary_writer = tf.compat.v2.summary.create_file_writer( eval_dir, flush_millis=summaries_flush_secs * 1000) eval_metrics = [ batched_py_metric.BatchedPyMetric( py_metrics.AverageReturnMetric, metric_args={'buffer_size': num_eval_episodes}, batch_size=num_parallel_environments_eval), batched_py_metric.BatchedPyMetric( py_metrics.AverageEpisodeLengthMetric, metric_args={'buffer_size': num_eval_episodes}, batch_size=num_parallel_environments_eval), ] eval_summary_writer_flush_op = eval_summary_writer.flush() global_step = tf.compat.v1.train.get_or_create_global_step() with tf.compat.v2.summary.record_if( lambda: tf.math.equal(global_step % summary_interval, 0)): if model_ids is None: model_ids = [None] * num_parallel_environments else: assert len(model_ids) == num_parallel_environments,\ 'model ids provided, but length not equal to num_parallel_environments' if model_ids_eval is None: model_ids_eval = [None] * num_parallel_environments_eval else: assert len(model_ids_eval) == num_parallel_environments_eval,\ 'model ids eval provided, but length not equal to num_parallel_environments_eval' tf_py_env = [lambda model_id=model_ids[i]: env_load_fn(model_id, 'headless', gpu) for i in range(num_parallel_environments)] tf_env = tf_py_environment.TFPyEnvironment(parallel_py_environment.ParallelPyEnvironment(tf_py_env)) if eval_env_mode == 'gui': assert num_parallel_environments_eval == 1, 'only one GUI env is allowed' eval_py_env = [lambda model_id=model_ids_eval[i]: env_load_fn(model_id, eval_env_mode, gpu) for i in range(num_parallel_environments_eval)] eval_py_env = parallel_py_environment.ParallelPyEnvironment(eval_py_env) optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate) time_step_spec = tf_env.time_step_spec() observation_spec = tf_env.observation_spec() action_spec = tf_env.action_spec() print('observation_spec', observation_spec) print('action_spec', action_spec) glorot_uniform_initializer = tf.compat.v1.keras.initializers.glorot_uniform() preprocessing_layers = { 'depth_seg': tf.keras.Sequential(mlp_layers( conv_layer_params=conv_layer_params, fc_layer_params=encoder_fc_layers, kernel_initializer=glorot_uniform_initializer, )), 'sensor': tf.keras.Sequential(mlp_layers( conv_layer_params=None, fc_layer_params=encoder_fc_layers, kernel_initializer=glorot_uniform_initializer, )), } preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1) if use_rnns: actor_net = actor_distribution_rnn_network.ActorDistributionRnnNetwork( observation_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, input_fc_layer_params=actor_fc_layers, output_fc_layer_params=None) value_net = value_rnn_network.ValueRnnNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, input_fc_layer_params=value_fc_layers, output_fc_layer_params=None) else: actor_net = actor_distribution_network.ActorDistributionNetwork( observation_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, fc_layer_params=actor_fc_layers, kernel_initializer=glorot_uniform_initializer ) value_net = value_network.ValueNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, fc_layer_params=value_fc_layers, kernel_initializer=glorot_uniform_initializer ) tf_agent = ppo_agent.PPOAgent( time_step_spec, action_spec, optimizer, actor_net=actor_net, value_net=value_net, num_epochs=num_epochs, debug_summaries=debug_summaries, summarize_grads_and_vars=summarize_grads_and_vars, train_step_counter=global_step) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer( tf_agent.collect_data_spec, batch_size=num_parallel_environments, max_length=replay_buffer_capacity) if eval_deterministic: eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.policy) else: eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.collect_policy) environment_steps_metric = tf_metrics.EnvironmentSteps() environment_steps_count = environment_steps_metric.result() step_metrics = [ tf_metrics.NumberOfEpisodes(), environment_steps_metric, ] train_metrics = step_metrics + [ tf_metrics.AverageReturnMetric( buffer_size=100, batch_size=num_parallel_environments), tf_metrics.AverageEpisodeLengthMetric( buffer_size=100, batch_size=num_parallel_environments), ] # Add to replay buffer and other agent specific observers. replay_buffer_observer = [replay_buffer.add_batch] collect_policy = tf_agent.collect_policy collect_op = dynamic_episode_driver.DynamicEpisodeDriver( tf_env, collect_policy, observers=replay_buffer_observer + train_metrics, num_episodes=collect_episodes_per_iteration * num_parallel_environments).run() trajectories = replay_buffer.gather_all() train_op, _ = tf_agent.train(experience=trajectories) with tf.control_dependencies([train_op]): clear_replay_op = replay_buffer.clear() with tf.control_dependencies([clear_replay_op]): train_op = tf.identity(train_op) train_checkpointer = common.Checkpointer( ckpt_dir=train_dir, agent=tf_agent, global_step=global_step, metrics=metric_utils.MetricsGroup(train_metrics, 'train_metrics')) policy_checkpointer = common.Checkpointer( ckpt_dir=os.path.join(train_dir, 'policy'), policy=tf_agent.policy, global_step=global_step) rb_checkpointer = common.Checkpointer( ckpt_dir=os.path.join(train_dir, 'replay_buffer'), max_to_keep=1, replay_buffer=replay_buffer) summary_ops = [] for train_metric in train_metrics: summary_ops.append(train_metric.tf_summaries( train_step=global_step, step_metrics=step_metrics)) with eval_summary_writer.as_default(), tf.compat.v2.summary.record_if(True): for eval_metric in eval_metrics: eval_metric.tf_summaries( train_step=global_step, step_metrics=step_metrics) init_agent_op = tf_agent.initialize() with sess.as_default(): # Initialize graph. train_checkpointer.initialize_or_restore(sess) rb_checkpointer.initialize_or_restore(sess) if eval_only: metric_utils.compute_summaries( eval_metrics, eval_py_env, eval_py_policy, num_episodes=num_eval_episodes, global_step=0, callback=eval_metrics_callback, tf_summaries=False, log=True, ) episodes = eval_py_env.get_stored_episodes() episodes = [episode for sublist in episodes for episode in sublist][:num_eval_episodes] metrics = episode_utils.get_metrics(episodes) for key in sorted(metrics.keys()): print(key, ':', metrics[key]) save_path = os.path.join(eval_dir, 'episodes_eval.pkl') episode_utils.save(episodes, save_path) print('EVAL DONE') return common.initialize_uninitialized_variables(sess) sess.run(init_agent_op) sess.run(train_summary_writer.init()) sess.run(eval_summary_writer.init()) collect_time = 0 train_time = 0 timed_at_step = sess.run(global_step) steps_per_second_ph = tf.compat.v1.placeholder( tf.float32, shape=(), name='steps_per_sec_ph') steps_per_second_summary = tf.compat.v2.summary.scalar( name='global_steps_per_sec', data=steps_per_second_ph, step=global_step) global_step_val = sess.run(global_step) while sess.run(environment_steps_count) < num_environment_steps: global_step_val = sess.run(global_step) if global_step_val % eval_interval == 0: metric_utils.compute_summaries( eval_metrics, eval_py_env, eval_py_policy, num_episodes=num_eval_episodes, global_step=global_step_val, callback=eval_metrics_callback, log=True, ) with eval_summary_writer.as_default(), tf.compat.v2.summary.record_if(True): with tf.name_scope('Metrics/'): episodes = eval_py_env.get_stored_episodes() episodes = [episode for sublist in episodes for episode in sublist][:num_eval_episodes] metrics = episode_utils.get_metrics(episodes) for key in sorted(metrics.keys()): print(key, ':', metrics[key]) metric_op = tf.compat.v2.summary.scalar(name=key, data=metrics[key], step=global_step_val) sess.run(metric_op) sess.run(eval_summary_writer_flush_op) start_time = time.time() sess.run(collect_op) collect_time += time.time() - start_time start_time = time.time() total_loss, _ = sess.run([train_op, summary_ops]) train_time += time.time() - start_time global_step_val = sess.run(global_step) if global_step_val % log_interval == 0: logging.info('step = %d, loss = %f', global_step_val, total_loss) steps_per_sec = ( (global_step_val - timed_at_step) / (collect_time + train_time)) logging.info('%.3f steps/sec', steps_per_sec) sess.run( steps_per_second_summary, feed_dict={steps_per_second_ph: steps_per_sec}) logging.info('%s', 'collect_time = {}, train_time = {}'.format( collect_time, train_time)) timed_at_step = global_step_val collect_time = 0 train_time = 0 if global_step_val % train_checkpoint_interval == 0: train_checkpointer.save(global_step=global_step_val) if global_step_val % policy_checkpoint_interval == 0: policy_checkpointer.save(global_step=global_step_val) if global_step_val % rb_checkpoint_interval == 0: rb_checkpointer.save(global_step=global_step_val) # One final eval before exiting. metric_utils.compute_summaries( eval_metrics, eval_py_env, eval_py_policy, num_episodes=num_eval_episodes, global_step=global_step_val, callback=eval_metrics_callback, log=True, ) sess.run(eval_summary_writer_flush_op) sess.close()
def __init__(self, input_tensor_spec, preprocessing_layers=None, preprocessing_combiner=None, action_fc_layer_params=(200, ), joint_fc_layer_params=(100, ), lstm_size=(40, ), output_fc_layer_params=(200, 100), activation_fn=tf.keras.activations.relu, name='MultiInputsCriticRnnNetwork'): """Creates an instance of `MultiInputsCriticRnnNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each of type `tensor_spec.TensorSpec` representing the inputs. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. action_fc_layer_params: Optional list of parameters for a fully_connected layer to apply to the actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of parameters for a fully_connected layer to apply after merging observations and actions, where each item is the number of units in the layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Returns: A tf.float32 Tensor of q-values. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one item. """ observation_spec, action_spec = input_tensor_spec if len(tf.nest.flatten(action_spec)) > 1: raise ValueError( 'Only a single action is supported by this network.') if preprocessing_layers is None: flat_preprocessing_layers = None else: flat_preprocessing_layers = [ _copy_layer(layer) for layer in tf.nest.flatten(preprocessing_layers) ] # Assert shallow structure is the same. This verifies preprocessing # layers can be applied on expected input nests. observation_nest = observation_spec # Given the flatten on preprocessing_layers above we need to make sure # input_tensor_spec is a sequence for the shallow_structure check below # to work. if not nest.is_sequence(observation_spec): observation_nest = [observation_spec] nest.assert_shallow_structure(preprocessing_layers, observation_nest, check_types=False) if (len(tf.nest.flatten(observation_spec)) > 1 and preprocessing_combiner is None): raise ValueError( 'preprocessing_combiner layer is required when more than 1 ' 'observation_spec is provided.') if preprocessing_combiner is not None: preprocessing_combiner = _copy_layer(preprocessing_combiner) action_layers = utils.mlp_layers( None, action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding') joint_layers = utils.mlp_layers( None, joint_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='joint_mlp') # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) counter = [-1] def create_spec(size): counter[0] += 1 return tensor_spec.TensorSpec(size, dtype=tf.float32, name='network_state_%d' % counter[0]) state_spec = tf.nest.map_structure(create_spec, cell.state_size) output_layers = utils.mlp_layers( fc_layer_params=output_fc_layer_params, name='output') output_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value')) super(MultiInputsCriticRnnNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=state_spec, name=name) self._action_layers = action_layers self._joint_layers = joint_layers self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell) self._output_layers = output_layers self._preprocessing_nest = tf.nest.map_structure( lambda l: None, preprocessing_layers) self._flat_preprocessing_layers = flat_preprocessing_layers self._preprocessing_combiner = preprocessing_combiner
def __init__(self, input_tensor_spec, output_tensor_spec, preprocessing_layers=None, preprocessing_combiner=None, conv_layer_params=None, input_fc_layer_params=(200, 100), lstm_size=(40,), output_fc_layer_params=(200, 100), activation_fn=tf.keras.activations.relu, name='MultiInputsActorRnnNetwork'): """Creates an instance of `MultiInputsActorRnnNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the input observations. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). input_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied before the LSTM cell. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Returns: A nest of action tensors matching the action_spec. Raises: ValueError: If `input_tensor_spec` contains more than one observation. """ observation_spec = input_tensor_spec if preprocessing_layers is None: flat_preprocessing_layers = None else: flat_preprocessing_layers = [ _copy_layer(layer) for layer in tf.nest.flatten(preprocessing_layers) ] # Assert shallow structure is the same. This verifies preprocessing # layers can be applied on expected input nests. observation_nest = observation_spec # Given the flatten on preprocessing_layers above we need to make sure # input_tensor_spec is a sequence for the shallow_structure check below # to work. if not nest.is_sequence(observation_spec): observation_nest = [observation_spec] nest.assert_shallow_structure( preprocessing_layers, observation_nest, check_types=False) if (len(tf.nest.flatten(observation_spec)) > 1 and preprocessing_combiner is None): raise ValueError( 'preprocessing_combiner layer is required when more than 1 ' 'observation_spec is provided.') if preprocessing_combiner is not None: preprocessing_combiner = _copy_layer(preprocessing_combiner) input_layers = utils.mlp_layers( conv_layer_params, input_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform(), name='input_mlp') # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) state_spec = tf.nest.map_structure( functools.partial( tensor_spec.TensorSpec, dtype=tf.float32, name='network_state_spec'), list(cell.state_size)) output_layers = utils.mlp_layers(fc_layer_params=output_fc_layer_params, name='output') flat_action_spec = tf.nest.flatten(output_tensor_spec) action_layers = [ tf.keras.layers.Dense( single_action_spec.shape.num_elements(), activation=tf.keras.activations.tanh, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='action') for single_action_spec in flat_action_spec ] super(MultiInputsActorRnnNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=state_spec, name=name) self._output_tensor_spec = output_tensor_spec self._flat_action_spec = flat_action_spec self._conv_layer_params = conv_layer_params self._input_layers = input_layers self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell) self._output_layers = output_layers self._action_layers = action_layers self._preprocessing_nest = tf.nest.map_structure(lambda l: None, preprocessing_layers) self._flat_preprocessing_layers = flat_preprocessing_layers self._preprocessing_combiner = preprocessing_combiner
def __init__(self, input_tensor_spec, output_tensor_spec, fc_layer_params=(200, 100), activation_fn=tf.nn.relu, output_activation_fn=None, kernel_initializer=None, last_kernel_initializer=None, discrete_projection_net=_categorical_projection_net, continuous_projection_net=_normal_projection_net, name='PolicyNetwork'): """Creates an instance of `ValueNetwork`. Args: input_tensor_spec: A possibly nested container of `tensor_spec.TensorSpec` representing the inputs. output_tensor_spec: A possibly nested container of `tensor_spec.TensorSpec` representing the outputs. fc_layer_params: Optional list of fully connected parameters after merging all inputs, where each item is the number of units in the layer. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... output_activation_fn: Activation function for the last layer. This can be used to restrict the range of the output. For example, one can pass tf.keras.activations.sigmoid here to restrict the output to be bounded between 0 and 1. kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer. If None, a RandomUniform initializer will be used. discrete_projection_net: projection layer for discrete actions. continuous_projection_net: projection layer for continuous actions. name: A string representing name of the network. """ def map_proj(spec): if tensor_spec.is_discrete(spec): return discrete_projection_net(spec) else: return continuous_projection_net(spec) projection_networks = tf.nest.map_structure(map_proj, output_tensor_spec) output_spec = tf.nest.map_structure( lambda proj_net: proj_net.output_spec, projection_networks) if tensor_spec.is_discrete(output_tensor_spec): action_dim = np.unique(output_tensor_spec.maximum - output_tensor_spec.minimum + 1) else: action_dim = output_tensor_spec.shape.num_elements() super(PolicyNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), output_spec=output_spec, name=name) self._flat_specs = tf.nest.flatten(input_tensor_spec) if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) self._fc_layers = utils.mlp_layers( None, fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='mlp') self._fc_layers.append( tf.keras.layers.Dense(action_dim, activation=output_activation_fn, kernel_initializer=last_kernel_initializer, name='value')) self._projection_networks = projection_networks self._output_tensor_spec = output_tensor_spec
def __init__(self, input_tensor_spec, observation_preprocessing_layers=None, observation_preprocessing_combiner=None, observation_conv_layer_params=None, observation_fc_layer_params=(200, ), action_fc_layer_params=(200, ), joint_fc_layer_params=(100), lstm_size=(40, ), output_fc_layer_params=(200, 100), activation_fn=tf.keras.activations.relu, dtype=tf.float32, name='CriticRnnNetwork'): """Creates an instance of `CriticRnnNetwork`. This CriticRnnNetwork supports handling complex observations with preprocessing_layer and preprocessing_combiner. Args: input_tensor_spec: A tuple of (observation, action) each of type `tensor_spec.TensorSpec` representing the inputs. observation_preprocessing_layers: (Optional.) A nest of `tf.keras.layers.Layer` representing preprocessing for the different observations. All of these layers must not be already built. For more details see the documentation of `networks.EncodingNetwork`. observation_preprocessing_combiner: (Optional.) A keras layer that takes a flat list of tensors and combines them. Good options include `tf.keras.layers.Add` and `tf.keras.layers.Concatenate(axis=-1)`. This layer must not be already built. For more details see the documentation of `networks.EncodingNetwork`. observation_conv_layer_params: Optional list of convolution layers parameters to apply to the observations, where each item is a length-three tuple indicating (filters, kernel_size, stride). observation_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the observation convultional layer. action_fc_layer_params: Optional list of parameters for a fully_connected layer to apply to the actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of parameters for a fully_connected layer to apply after merging observations and actions, where each item is the number of units in the layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `action_spec` contains more than one item. """ observation_spec, action_spec = input_tensor_spec if len(tf.nest.flatten(action_spec)) > 1: raise ValueError( 'Only a single action is supported by this network.') kernel_initializer = tf.compat.v1.variance_scaling_initializer( scale=2.0, mode='fan_in', distribution='truncated_normal') obs_encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=observation_preprocessing_layers, preprocessing_combiner=observation_preprocessing_combiner, conv_layer_params=observation_conv_layer_params, fc_layer_params=observation_fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, dtype=dtype, name='obs_encoding') action_layers = sequential_layer.SequentialLayer( utils.mlp_layers(fc_layer_params=action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras. initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding')) obs_encoding_spec = tf.TensorSpec( shape=(observation_fc_layer_params[-1], ), dtype=tf.float32) lstm_encoder = lstm_encoding_network.LSTMEncodingNetwork( input_tensor_spec=(obs_encoding_spec, action_spec), preprocessing_layers=(tf.keras.layers.Flatten(), action_layers), preprocessing_combiner=tf.keras.layers.Concatenate(axis=-1), input_fc_layer_params=joint_fc_layer_params, lstm_size=lstm_size, output_fc_layer_params=output_fc_layer_params, activation_fn=activation_fn, dtype=dtype, name='lstm') output_layers = [ tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value') ] super(CriticRnnNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=lstm_encoder.state_spec, name=name) self._obs_encoder = obs_encoder self._lstm_encoder = lstm_encoder self._output_layers = output_layers
def __init__(self, input_tensor_spec, output_tensor_spec, fc_layer_params=None, dropout_layer_params=None, conv_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=None, last_kernel_initializer=None, name='ActorNetwork'): """Creates an instance of `ActorNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the inputs. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the outputs. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer. If None, a RandomUniform initializer will be used. name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` or `action_spec` contains more than one item, or if the action data type is not `float`. """ super(ActorNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(output_tensor_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] if self._single_action_spec.dtype not in [tf.float32, tf.float64]: raise ValueError( 'Only float actions are supported by this network.') if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) # TODO(kbanoop): Replace mlp_layers with encoding networks. self._mlp_layers = utils.mlp_layers( conv_layer_params, fc_layer_params, dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='input_mlp') self._mlp_layers.append( tf.keras.layers.Dense(flat_action_spec[0].shape.num_elements(), activation=tf.keras.activations.tanh, kernel_initializer=last_kernel_initializer, name='action')) self._output_tensor_spec = output_tensor_spec
def __init__(self, input_tensor_spec, gnn, observation_fc_layer_params=None, observation_dropout_layer_params=None, observation_conv_layer_params=None, observation_activation_fn=tf.nn.relu, action_fc_layer_params=None, action_dropout_layer_params=None, action_conv_layer_params=None, action_activation_fn=tf.nn.relu, joint_fc_layer_params=None, joint_dropout_layer_params=None, joint_activation_fn=tf.nn.relu, output_activation_fn=None, name='CriticNetwork'): """Creates an instance of `GNNCriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. gnn: The function that initializes a graph neural network that accepts the input observations and computes node embeddings. observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. observation_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of observation_fc_layer_params, or be None. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_activation_fn: Activation function applied to the observation layers, e.g. tf.nn.relu, slim.leaky_relu, ... action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. action_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of action_fc_layer_params, or be None. action_conv_layer_params: Optional list of convolution layer parameters for actions, where each item is a length-three tuple indicating (num_units, kernel_size, stride). action_activation_fn: Activation function applied to the action layers, e.g. tf.nn.relu, slim.leaky_relu, ... joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. joint_activation_fn: Activation function applied to the joint layers, e.g. tf.nn.relu, slim.leaky_relu, ... output_activation_fn: Activation function for the last layer. This can be used to restrict the range of the output. For example, one can pass tf.keras.activations.sigmoid here to restrict the output to be bounded between 0 and 1. name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ super(GNNCriticNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec if len(tf.nest.flatten(observation_spec)) > 1: raise ValueError('Only a single observation is supported by this network') if len(tf.nest.flatten(action_spec)) > 1: raise ValueError('Only a single action is supported by this network') if gnn is None: raise ValueError('`gnn` must not be `None`.') self._gnn = gnn(name=name + "_GNN") self._observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, observation_dropout_layer_params, activation_fn=observation_activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1./3., mode='fan_in', distribution='uniform'), name='observation_encoding') self._action_layers = utils.mlp_layers( action_conv_layer_params, action_fc_layer_params, action_dropout_layer_params, activation_fn=action_activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1./3., mode='fan_in', distribution='uniform'), name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, joint_dropout_layer_params, activation_fn=joint_activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1./3., mode='fan_in', distribution='uniform'), name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( units=1, activation=output_activation_fn, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value'))
def __init__(self, observation_spec, action_spec, observation_conv_layer_params=None, observation_fc_layer_params=None, action_fc_layer_params=None, joint_fc_layer_params=None, activation_fn=tf.nn.relu, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: observation_spec: A nest of `tensor_spec.TensorSpec` representing the observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ super(CriticNetwork, self).__init__(observation_spec=observation_spec, action_spec=action_spec, state_spec=(), name=name) if len(nest.flatten(observation_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network') flat_action_spec = nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] # TODO(kbanoop): Replace mlp_layers with encoding networks. self._observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='observation_encoding') self._action_layers = utils.mlp_layers( None, action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value'))
def __init__(self, input_tensor_spec, fc_layer_params=(75, 40), dropout_layer_params=None, conv_layer_params=None, activation_fn=tf.keras.activations.relu, name='ValueNetwork'): """Creates an instance of `ValueNetwork`. Network supports calls with shape outer_rank + observation_spec.shape. Note outer_rank must be at least 1. Args: input_tensor_spec: A `tensor_spec.TensorSpec` or a tuple of specs representing the input observations. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of fc_layer_params, or be None. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.keras.activations.relu,. name: A string representing name of the network. Raises: ValueError: If input_tensor_spec is not an instance of network.InputSpec. ValueError: If `input_tensor_spec.observations` contains more than one observation. """ super(ValueNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError( 'Network only supports observation specs with a single observation.' ) self._postprocessing_layers = utils.mlp_layers( conv_layer_params, fc_layer_params, dropout_layer_params=dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.glorot_uniform( ), name='input_mlp') self._postprocessing_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.compat.v1.initializers.random_uniform( minval=-0.03, maxval=0.03), ))
def __init__(self, input_tensor_spec, output_tensor_spec, fc_layer_params=None, conv_layer_params=None, activation_fn=tf.keras.activations.relu, name='ActorNetwork'): """Creates an instance of `ActorNetwork`. Args: input_tensor_spec: A nest of `tensor_spec.TensorSpec` representing the inputs. output_tensor_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the outputs. fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Raises: ValueError: If `input_tensor_spec` or `action_spec` contains more than one item, or if the action data type is not `float`. """ super(ActorNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) if len(tf.nest.flatten(input_tensor_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network') flat_action_spec = tf.nest.flatten(output_tensor_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] if self._single_action_spec.dtype not in [tf.float32, tf.float64]: raise ValueError( 'Only float actions are supported by this network.') # TODO(kbanoop): Replace mlp_layers with encoding networks. self._mlp_layers = utils.mlp_layers( conv_layer_params, fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='input_mlp') self._mlp_layers.append( tf.keras.layers.Dense( flat_action_spec[0].shape.num_elements(), activation=tf.keras.activations.tanh, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='action')) self._output_tensor_spec = output_tensor_spec
def __init__( self, input_tensor_spec, # observation_conv_layer_params=None, # observation_fc_layer_params=None, # observation_dropout_layer_params=None, # action_fc_layer_params=None, # action_dropout_layer_params=None, preprocessing_layers, preprocessing_combiner, joint_fc_layer_params=None, joint_dropout_layer_params=None, joint_activation_fn=tf.nn.relu, output_activation_fn=None, kernel_initializer=None, last_kernel_initializer=None, name='CriticNetwork'): """Creates an instance of `CriticNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each a nest of `tensor_spec.TensorSpec` representing the inputs. observation_conv_layer_params: Optional list of convolution layer parameters for observations, where each item is a length-three tuple indicating (num_units, kernel_size, stride). observation_fc_layer_params: Optional list of fully connected parameters for observations, where each item is the number of units in the layer. observation_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of observation_fc_layer_params, or be None. action_fc_layer_params: Optional list of fully connected parameters for actions, where each item is the number of units in the layer. action_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of action_fc_layer_params, or be None. joint_fc_layer_params: Optional list of fully connected parameters after merging observations and actions, where each item is the number of units in the layer. joint_dropout_layer_params: Optional list of dropout layer parameters, each item is the fraction of input units to drop or a dictionary of parameters according to the keras.Dropout documentation. The additional parameter `permanent', if set to True, allows to apply dropout at inference for approximated Bayesian inference. The dropout layers are interleaved with the fully connected layers; there is a dropout layer after each fully connected layer, except if the entry in the list is None. This list must have the same length of joint_fc_layer_params, or be None. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... output_activation_fn: Activation function for the last layer. This can be used to restrict the range of the output. For example, one can pass tf.keras.activations.sigmoid here to restrict the output to be bounded between 0 and 1. kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer. If None, a RandomUniform initializer will be used. name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one observation. """ super(MultiObservationCriticNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) observation_spec, action_spec = input_tensor_spec flat_action_spec = tf.nest.flatten(action_spec) self._single_action_spec = flat_action_spec[0] # set up kernel_initializer if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) # set up encoder_network self._encoder = encoding_network.EncodingNetwork( observation_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, conv_layer_params=None, fc_layer_params=None, dropout_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=kernel_initializer, batch_squash=False) # TODO(kbanoop): Replace mlp_layers with encoding networks. # self._observation_layers = utils.mlp_layers( # observation_conv_layer_params, # observation_fc_layer_params, # observation_dropout_layer_params, # activation_fn=activation_fn, # kernel_initializer=kernel_initializer, # name='observation_encoding') # self._action_layers = utils.mlp_layers( # None, # action_fc_layer_params, # action_dropout_layer_params, # activation_fn=activation_fn, # kernel_initializer=kernel_initializer, # name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, joint_dropout_layer_params, activation_fn=joint_activation_fn, kernel_initializer=kernel_initializer, name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense(1, activation=output_activation_fn, kernel_initializer=last_kernel_initializer, name='value'))
def __init__(self, observation_spec, action_spec, observation_conv_layer_params=None, observation_fc_layer_params=(200, ), action_fc_layer_params=(200, ), joint_fc_layer_params=(100), lstm_size=(40, ), output_fc_layer_params=(200, 100), activation_fn=tf.keras.activations.relu, name='CriticRnnNetwork'): """Creates an instance of `CriticRnnNetwork`. Args: observation_spec: A nest of `tensor_spec.TensorSpec` representing the observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. observation_conv_layer_params: Optional list of convolution layers parameters to apply to the observations, where each item is a length-three tuple indicating (filters, kernel_size, stride). observation_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the observation convultional layer. action_fc_layer_params: Optional list of parameters for a fully_connected layer to apply to the actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of parameters for a fully_connected layer to apply after merging observations and actions, where each item is the number of units in the layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Returns: A tf.float32 Tensor of q-values. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one item. """ if len(nest.flatten(observation_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network.') if len(nest.flatten(action_spec)) > 1: raise ValueError( 'Only a single action is supported by this network.') observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='observation_encoding') action_layers = utils.mlp_layers( None, action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding') joint_layers = utils.mlp_layers( None, joint_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='joint_mlp') # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) state_spec = nest.map_structure( functools.partial(tensor_spec.TensorSpec, dtype=tf.float32, name='network_state_spec'), list(cell.state_size)) output_layers = utils.mlp_layers( fc_layer_params=output_fc_layer_params, name='output') output_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value')) super(CriticRnnNetwork, self).__init__(observation_spec=observation_spec, action_spec=action_spec, state_spec=state_spec, name=name) self._observation_layers = observation_layers self._action_layers = action_layers self._joint_layers = joint_layers self._dynamic_unroll = dynamic_unroll_layer.DynamicUnroll(cell) self._output_layers = output_layers
def __init__(self, input_tensor_spec, preprocessing_combiner=None, observation_conv_layer_params=None, observation_fc_layer_params=None, observation_dropout_layer_params=None, action_fc_layer_params=None, action_dropout_layer_params=None, joint_fc_layer_params=None, joint_dropout_layer_params=None, activation_fn=tf.nn.relu, output_activation_fn=None, mask_xy=False, name='CriticNetwork'): super(CriticNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=(), name=name) self._mask_xy = mask_xy observation_spec, action_spec = input_tensor_spec flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action is supported by this network') self._single_action_spec = flat_action_spec[0] self._observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, observation_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='observation_encoding') self._action_layers = utils.mlp_layers( None, action_fc_layer_params, action_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='action_encoding') self._joint_layers = utils.mlp_layers( None, joint_fc_layer_params, joint_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform'), name='joint_mlp') self._joint_layers.append( tf.keras.layers.Dense( 1, activation=output_activation_fn, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='value')) self._preprocessing_combiner = preprocessing_combiner
def __init__(self, observation_spec, action_spec, conv_layer_params=None, input_fc_layer_params=(200, 100), lstm_size=(40, ), output_fc_layer_params=(200, 100), activation_fn=tf.keras.activations.relu, name='ActorRnnNetwork'): """Creates an instance of `ActorRnnNetwork`. Args: observation_spec: A nest of `tensor_spec.TensorSpec` representing the observations. action_spec: A nest of `tensor_spec.BoundedTensorSpec` representing the actions. conv_layer_params: Optional list of convolution layers parameters, where each item is a length-three tuple indicating (filters, kernel_size, stride). input_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied before the LSTM cell. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... name: A string representing name of the network. Returns: A nest of action tensors matching the action_spec. Raises: ValueError: If `observation_spec` contains more than one observation. """ if len(nest.flatten(observation_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network') input_layers = utils.mlp_layers( conv_layer_params, input_fc_layer_params, activation_fn=activation_fn, kernel_initializer=tf.keras.initializers.glorot_uniform(), name='input_mlp') # Create RNN cell if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) state_spec = nest.map_structure( functools.partial(tensor_spec.TensorSpec, dtype=tf.float32, name='network_state_spec'), list(cell.state_size)) output_layers = utils.mlp_layers( fc_layer_params=output_fc_layer_params, name='output') flat_action_spec = nest.flatten(action_spec) action_layers = [ tf.keras.layers.Dense( single_action_spec.shape.num_elements(), activation=tf.keras.activations.tanh, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003), name='action') for single_action_spec in flat_action_spec ] super(ActorRnnNetwork, self).__init__(observation_spec=observation_spec, action_spec=action_spec, state_spec=state_spec, name=name) self._flat_action_spec = flat_action_spec self._conv_layer_params = conv_layer_params self._input_layers = input_layers self._cell = cell self._output_layers = output_layers self._action_layers = action_layers
def train_eval( root_dir, gpu=0, env_load_fn=None, model_ids=None, eval_env_mode='headless', num_iterations=1000000, conv_layer_params=None, encoder_fc_layers=[256], actor_fc_layers=[400, 300], critic_obs_fc_layers=[400], critic_action_fc_layers=None, critic_joint_fc_layers=[300], # Params for collect initial_collect_steps=1000, collect_steps_per_iteration=1, num_parallel_environments=1, replay_buffer_capacity=100000, ou_stddev=0.2, ou_damping=0.15, # Params for target update target_update_tau=0.05, target_update_period=5, # Params for train train_steps_per_iteration=1, batch_size=64, actor_learning_rate=1e-4, critic_learning_rate=1e-3, dqda_clipping=None, td_errors_loss_fn=tf.compat.v1.losses.huber_loss, gamma=0.995, reward_scale_factor=1.0, gradient_clipping=None, # Params for eval num_eval_episodes=10, eval_interval=10000, eval_only=False, eval_deterministic=False, num_parallel_environments_eval=1, model_ids_eval=None, # Params for checkpoints, summaries, and logging train_checkpoint_interval=10000, policy_checkpoint_interval=10000, rb_checkpoint_interval=50000, log_interval=100, summary_interval=1000, summaries_flush_secs=10, debug_summaries=False, summarize_grads_and_vars=False, eval_metrics_callback=None): """A simple train and eval for DDPG.""" root_dir = os.path.expanduser(root_dir) train_dir = os.path.join(root_dir, 'train') eval_dir = os.path.join(root_dir, 'eval') train_summary_writer = tf.compat.v2.summary.create_file_writer( train_dir, flush_millis=summaries_flush_secs * 1000) train_summary_writer.set_as_default() eval_summary_writer = tf.compat.v2.summary.create_file_writer( eval_dir, flush_millis=summaries_flush_secs * 1000) eval_metrics = [ batched_py_metric.BatchedPyMetric( py_metrics.AverageReturnMetric, metric_args={'buffer_size': num_eval_episodes}, batch_size=num_parallel_environments_eval), batched_py_metric.BatchedPyMetric( py_metrics.AverageEpisodeLengthMetric, metric_args={'buffer_size': num_eval_episodes}, batch_size=num_parallel_environments_eval), ] eval_summary_flush_op = eval_summary_writer.flush() global_step = tf.compat.v1.train.get_or_create_global_step() with tf.compat.v2.summary.record_if( lambda: tf.math.equal(global_step % summary_interval, 0)): if model_ids is None: model_ids = [None] * num_parallel_environments else: assert len(model_ids) == num_parallel_environments, \ 'model ids provided, but length not equal to num_parallel_environments' if model_ids_eval is None: model_ids_eval = [None] * num_parallel_environments_eval else: assert len(model_ids_eval) == num_parallel_environments_eval,\ 'model ids eval provided, but length not equal to num_parallel_environments_eval' tf_py_env = [ lambda model_id=model_ids[i]: env_load_fn(model_id, 'headless', gpu ) for i in range(num_parallel_environments) ] tf_env = tf_py_environment.TFPyEnvironment( parallel_py_environment.ParallelPyEnvironment(tf_py_env)) if eval_env_mode == 'gui': assert num_parallel_environments_eval == 1, 'only one GUI env is allowed' eval_py_env = [ lambda model_id=model_ids_eval[i]: env_load_fn( model_id, eval_env_mode, gpu) for i in range(num_parallel_environments_eval) ] eval_py_env = parallel_py_environment.ParallelPyEnvironment( eval_py_env) # Get the data specs from the environment time_step_spec = tf_env.time_step_spec() observation_spec = time_step_spec.observation action_spec = tf_env.action_spec() print('observation_spec', observation_spec) print('action_spec', action_spec) glorot_uniform_initializer = tf.compat.v1.keras.initializers.glorot_uniform( ) preprocessing_layers = { 'depth_seg': tf.keras.Sequential( mlp_layers( conv_layer_params=conv_layer_params, fc_layer_params=encoder_fc_layers, kernel_initializer=glorot_uniform_initializer, )), 'sensor': tf.keras.Sequential( mlp_layers( conv_layer_params=None, fc_layer_params=encoder_fc_layers, kernel_initializer=glorot_uniform_initializer, )), } preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1) actor_net = actor_network.ActorNetwork( observation_spec, action_spec, preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, fc_layer_params=actor_fc_layers, kernel_initializer=glorot_uniform_initializer, ) critic_net = critic_network.CriticNetwork( (observation_spec, action_spec), preprocessing_layers=preprocessing_layers, preprocessing_combiner=preprocessing_combiner, observation_fc_layer_params=critic_obs_fc_layers, action_fc_layer_params=critic_action_fc_layers, joint_fc_layer_params=critic_joint_fc_layers, kernel_initializer=glorot_uniform_initializer, ) tf_agent = ddpg_agent.DdpgAgent( tf_env.time_step_spec(), tf_env.action_spec(), actor_network=actor_net, critic_network=critic_net, actor_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=actor_learning_rate), critic_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=critic_learning_rate), ou_stddev=ou_stddev, ou_damping=ou_damping, target_update_tau=target_update_tau, target_update_period=target_update_period, dqda_clipping=dqda_clipping, td_errors_loss_fn=td_errors_loss_fn, gamma=gamma, reward_scale_factor=reward_scale_factor, gradient_clipping=gradient_clipping, debug_summaries=debug_summaries, summarize_grads_and_vars=summarize_grads_and_vars, train_step_counter=global_step) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=config) replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer( data_spec=tf_agent.collect_data_spec, batch_size=tf_env.batch_size, max_length=replay_buffer_capacity) replay_observer = [replay_buffer.add_batch] if eval_deterministic: eval_py_policy = py_tf_policy.PyTFPolicy( greedy_policy.GreedyPolicy(tf_agent.policy)) else: eval_py_policy = py_tf_policy.PyTFPolicy(tf_agent.policy) step_metrics = [ tf_metrics.NumberOfEpisodes(), tf_metrics.EnvironmentSteps(), ] train_metrics = step_metrics + [ tf_metrics.AverageReturnMetric( buffer_size=100, batch_size=num_parallel_environments), tf_metrics.AverageEpisodeLengthMetric( buffer_size=100, batch_size=num_parallel_environments), ] collect_policy = tf_agent.collect_policy initial_collect_policy = random_tf_policy.RandomTFPolicy( time_step_spec, action_spec) initial_collect_op = dynamic_step_driver.DynamicStepDriver( tf_env, initial_collect_policy, observers=replay_observer + train_metrics, num_steps=initial_collect_steps * num_parallel_environments).run() collect_op = dynamic_step_driver.DynamicStepDriver( tf_env, collect_policy, observers=replay_observer + train_metrics, num_steps=collect_steps_per_iteration * num_parallel_environments).run() # Prepare replay buffer as dataset with invalid transitions filtered. def _filter_invalid_transition(trajectories, unused_arg1): return ~trajectories.is_boundary()[0] # Dataset generates trajectories with shape [Bx2x...] dataset = replay_buffer.as_dataset( num_parallel_calls=5, sample_batch_size=5 * batch_size, num_steps=2).apply(tf.data.experimental.unbatch()).filter( _filter_invalid_transition).batch(batch_size).prefetch(5) dataset_iterator = tf.compat.v1.data.make_initializable_iterator( dataset) trajectories, unused_info = dataset_iterator.get_next() train_op = tf_agent.train(trajectories) summary_ops = [] for train_metric in train_metrics: summary_ops.append( train_metric.tf_summaries(train_step=global_step, step_metrics=step_metrics)) with eval_summary_writer.as_default(), tf.compat.v2.summary.record_if( True): for eval_metric in eval_metrics: eval_metric.tf_summaries(train_step=global_step, step_metrics=step_metrics) train_checkpointer = common.Checkpointer( ckpt_dir=train_dir, agent=tf_agent, global_step=global_step, metrics=metric_utils.MetricsGroup(train_metrics, 'train_metrics')) policy_checkpointer = common.Checkpointer(ckpt_dir=os.path.join( train_dir, 'policy'), policy=tf_agent.policy, global_step=global_step) rb_checkpointer = common.Checkpointer(ckpt_dir=os.path.join( train_dir, 'replay_buffer'), max_to_keep=1, replay_buffer=replay_buffer) init_agent_op = tf_agent.initialize() with sess.as_default(): # Initialize the graph. train_checkpointer.initialize_or_restore(sess) if eval_only: metric_utils.compute_summaries( eval_metrics, eval_py_env, eval_py_policy, num_episodes=num_eval_episodes, global_step=0, callback=eval_metrics_callback, tf_summaries=False, log=True, ) episodes = eval_py_env.get_stored_episodes() episodes = [ episode for sublist in episodes for episode in sublist ][:num_eval_episodes] metrics = episode_utils.get_metrics(episodes) for key in sorted(metrics.keys()): print(key, ':', metrics[key]) save_path = os.path.join(eval_dir, 'episodes_vis.pkl') episode_utils.save(episodes, save_path) print('EVAL DONE') return # Initialize training. rb_checkpointer.initialize_or_restore(sess) sess.run(dataset_iterator.initializer) common.initialize_uninitialized_variables(sess) sess.run(init_agent_op) sess.run(train_summary_writer.init()) sess.run(eval_summary_writer.init()) global_step_val = sess.run(global_step) if global_step_val == 0: # Initial eval of randomly initialized policy metric_utils.compute_summaries( eval_metrics, eval_py_env, eval_py_policy, num_episodes=num_eval_episodes, global_step=0, callback=eval_metrics_callback, tf_summaries=True, log=True, ) # Run initial collect. logging.info('Global step %d: Running initial collect op.', global_step_val) sess.run(initial_collect_op) # Checkpoint the initial replay buffer contents. rb_checkpointer.save(global_step=global_step_val) logging.info('Finished initial collect.') else: logging.info('Global step %d: Skipping initial collect op.', global_step_val) collect_call = sess.make_callable(collect_op) train_step_call = sess.make_callable([train_op, summary_ops]) global_step_call = sess.make_callable(global_step) timed_at_step = sess.run(global_step) time_acc = 0 steps_per_second_ph = tf.compat.v1.placeholder( tf.float32, shape=(), name='steps_per_sec_ph') steps_per_second_summary = tf.compat.v2.summary.scalar( name='global_steps_per_sec', data=steps_per_second_ph, step=global_step) for _ in range(num_iterations): start_time = time.time() collect_call() # print('collect:', time.time() - start_time) # train_start_time = time.time() for _ in range(train_steps_per_iteration): loss_info_value, _ = train_step_call() # print('train:', time.time() - train_start_time) time_acc += time.time() - start_time global_step_val = global_step_call() if global_step_val % log_interval == 0: logging.info('step = %d, loss = %f', global_step_val, loss_info_value.loss) steps_per_sec = (global_step_val - timed_at_step) / time_acc logging.info('%.3f steps/sec', steps_per_sec) sess.run(steps_per_second_summary, feed_dict={steps_per_second_ph: steps_per_sec}) timed_at_step = global_step_val time_acc = 0 if global_step_val % train_checkpoint_interval == 0: train_checkpointer.save(global_step=global_step_val) if global_step_val % policy_checkpoint_interval == 0: policy_checkpointer.save(global_step=global_step_val) if global_step_val % rb_checkpoint_interval == 0: rb_checkpointer.save(global_step=global_step_val) if global_step_val % eval_interval == 0: metric_utils.compute_summaries( eval_metrics, eval_py_env, eval_py_policy, num_episodes=num_eval_episodes, global_step=0, callback=eval_metrics_callback, tf_summaries=True, log=True, ) with eval_summary_writer.as_default( ), tf.compat.v2.summary.record_if(True): with tf.name_scope('Metrics/'): episodes = eval_py_env.get_stored_episodes() episodes = [ episode for sublist in episodes for episode in sublist ][:num_eval_episodes] metrics = episode_utils.get_metrics(episodes) for key in sorted(metrics.keys()): print(key, ':', metrics[key]) metric_op = tf.compat.v2.summary.scalar( name=key, data=metrics[key], step=global_step_val) sess.run(metric_op) sess.run(eval_summary_flush_op) sess.close()
def __init__(self, input_tensor_spec, observation_conv_layer_params=None, observation_fc_layer_params=(200, ), action_fc_layer_params=(200, ), joint_fc_layer_params=(100, ), lstm_size=None, output_fc_layer_params=(200, 100), activation_fn=tf.keras.activations.relu, kernel_initializer=None, last_kernel_initializer=None, rnn_construction_fn=None, rnn_construction_kwargs=None, name='CriticRnnNetwork'): """Creates an instance of `CriticRnnNetwork`. Args: input_tensor_spec: A tuple of (observation, action) each of type `tensor_spec.TensorSpec` representing the inputs. observation_conv_layer_params: Optional list of convolution layers parameters to apply to the observations, where each item is a length-three tuple indicating (filters, kernel_size, stride). observation_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the observation convultional layer. action_fc_layer_params: Optional list of parameters for a fully_connected layer to apply to the actions, where each item is the number of units in the layer. joint_fc_layer_params: Optional list of parameters for a fully_connected layer to apply after merging observations and actions, where each item is the number of units in the layer. lstm_size: An iterable of ints specifying the LSTM cell sizes to use. output_fc_layer_params: Optional list of fully_connected parameters, where each item is the number of units in the layer. This is applied after the LSTM cell. activation_fn: Activation function, e.g. tf.nn.relu, slim.leaky_relu, ... kernel_initializer: kernel initializer for all layers except for the value regression layer. If None, a VarianceScaling initializer will be used. last_kernel_initializer: kernel initializer for the value regression layer . If None, a RandomUniform initializer will be used. rnn_construction_fn: (Optional.) Alternate RNN construction function, e.g. tf.keras.layers.LSTM, tf.keras.layers.CuDNNLSTM. It is invalid to provide both rnn_construction_fn and lstm_size. rnn_construction_kwargs: (Optional.) Dictionary or arguments to pass to rnn_construction_fn. The RNN will be constructed via: ``` rnn_layer = rnn_construction_fn(**rnn_construction_kwargs) ``` name: A string representing name of the network. Raises: ValueError: If `observation_spec` or `action_spec` contains more than one item. ValueError: If neither `lstm_size` nor `rnn_construction_fn` are provided. ValueError: If both `lstm_size` and `rnn_construction_fn` are provided. """ if lstm_size is None and rnn_construction_fn is None: raise ValueError( 'Need to provide either custom rnn_construction_fn or ' 'lstm_size.') if lstm_size and rnn_construction_fn: raise ValueError( 'Cannot provide both custom rnn_construction_fn and ' 'lstm_size.') observation_spec, action_spec = input_tensor_spec if len(tf.nest.flatten(observation_spec)) > 1: raise ValueError( 'Only a single observation is supported by this network.') if len(tf.nest.flatten(action_spec)) > 1: raise ValueError( 'Only a single action is supported by this network.') if kernel_initializer is None: kernel_initializer = tf.compat.v1.keras.initializers.VarianceScaling( scale=1. / 3., mode='fan_in', distribution='uniform') if last_kernel_initializer is None: last_kernel_initializer = tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003) observation_layers = utils.mlp_layers( observation_conv_layer_params, observation_fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='observation_encoding') action_layers = utils.mlp_layers(None, action_fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='action_encoding') joint_layers = utils.mlp_layers(None, joint_fc_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, name='joint_mlp') # Create RNN cell if rnn_construction_fn: rnn_construction_kwargs = rnn_construction_kwargs or {} lstm_network = rnn_construction_fn(**rnn_construction_kwargs) else: if len(lstm_size) == 1: cell = tf.keras.layers.LSTMCell(lstm_size[0]) else: cell = tf.keras.layers.StackedRNNCells( [tf.keras.layers.LSTMCell(size) for size in lstm_size]) lstm_network = dynamic_unroll_layer.DynamicUnroll(cell) counter = [-1] def create_spec(size): counter[0] += 1 return tensor_spec.TensorSpec(size, dtype=tf.float32, name='network_state_%d' % counter[0]) state_spec = tf.nest.map_structure(create_spec, lstm_network.cell.state_size) output_layers = utils.mlp_layers( fc_layer_params=output_fc_layer_params, name='output') output_layers.append( tf.keras.layers.Dense(1, activation=None, kernel_initializer=last_kernel_initializer, name='value')) super(CriticRnnNetwork, self).__init__(input_tensor_spec=input_tensor_spec, state_spec=state_spec, name=name) self._observation_layers = observation_layers self._action_layers = action_layers self._joint_layers = joint_layers self._lstm_network = lstm_network self._output_layers = output_layers
def __init__( self, input_tensor_spec, observation_preprocessing_layers=None, observation_preprocessing_combiner=None, observation_conv_layer_params=None, observation_fc_layer_params=(200, 200), observation_dropout_layer_params=None, action_fc_layer_params=None, action_dropout_layer_params=None, joint_fc_layer_params=None, joint_dropout_layer_params=None, activation_fn=tf.keras.activations.relu, kernel_initializer=None, batch_squash=True, dtype=tf.float32, name="CriticNetwork", ): super(CriticNetwork, self).__init__( input_tensor_spec=input_tensor_spec, state_spec=(), name=name ) encoder_input_tensor_spec, _ = input_tensor_spec self._encoder = encoding_network.EncodingNetwork( encoder_input_tensor_spec, preprocessing_layers=observation_preprocessing_layers, preprocessing_combiner=observation_preprocessing_combiner, conv_layer_params=observation_conv_layer_params, fc_layer_params=observation_fc_layer_params, dropout_layer_params=observation_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=kernel_initializer, batch_squash=batch_squash, dtype=dtype, name="observation_encoding" ) self._action_layers = utils.mlp_layers( conv_layer_params=None, fc_layer_params=action_fc_layer_params, dropout_layer_params=action_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1.0 / 3.0, mode="fan_in", distribution="uniform" ), name="action_encoding", ) self._joint_layers = utils.mlp_layers( conv_layer_params=None, fc_layer_params=joint_fc_layer_params, dropout_layer_params=joint_dropout_layer_params, activation_fn=activation_fn, kernel_initializer=tf.compat.v1.keras.initializers.VarianceScaling( scale=1.0 / 3.0, mode="fan_in", distribution="uniform" ), name="joint_mlp", ) self._joint_layers.append( tf.keras.layers.Dense( 1, activation=None, kernel_initializer=tf.keras.initializers.RandomUniform( minval=-0.003, maxval=0.003 ), name="value", ) )