def call(self, observations, step_type=(), network_state=()): # observations = self._bn_layer(observations) tempt = self._dense1(observations) tempt = self._dense2(tempt) tempt = self._dense3(tempt) actions = self._action_projection_layer(tempt) actions = common_utils.scale_to_spec(actions, self._action_spec) return actions, network_state
def call(self, observations, step_type=(), network_state=()): del step_type # unused. observations = nest.flatten(observations) output = tf.cast(observations[0], tf.float32) for layer in self._mlp_layers: output = layer(output) actions = common_utils.scale_to_spec(output, self._single_action_spec) return nest.pack_sequence_as(self._action_spec, [actions]), network_state
def call(self, observations, step_type=(), network_state=(), training=False): del step_type # unused. observations = tf.nest.flatten(observations) output = tf.cast(observations[0], tf.float32) for layer in self._mlp_layers: output = layer(output, training=training) actions = common.scale_to_spec(output, self._single_action_spec) output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec, [actions]) return output_actions, network_state
def call(self, observations, step_type=(), network_state=()): del step_type # unused. observations = tf.cast(nest.flatten(observations)[0], tf.float32) output = self._layer(observations) actions = tf.reshape(output, [-1] + self._single_action_spec.shape.as_list()) if not self._unbounded_actions: actions = common_utils.scale_to_spec(actions, self._single_action_spec) output_actions = nest.pack_sequence_as(self._output_tensor_spec, [actions]) return output_actions, network_state
def call(self, observation, step_type, network_state=(), training=False): num_outer_dims = nest_utils.get_outer_rank(observation, self.input_tensor_spec) if num_outer_dims not in (1, 2): raise ValueError( 'Input observation must have a batch or batch x time outer shape.' ) has_time_dim = num_outer_dims == 2 if not has_time_dim: # Add a time dimension to the inputs. observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1), observation) step_type = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1), step_type) states = tf.cast(tf.nest.flatten(observation)[0], tf.float32) batch_squash = utils.BatchSquash(2) # Squash B, and T dims. states = batch_squash.flatten(states) # [B, T, ...] -> [B x T, ...] for layer in self._input_layers: states = layer(states, training=training) states = batch_squash.unflatten(states) # [B x T, ...] -> [B, T, ...] with tf.name_scope('reset_mask'): reset_mask = tf.equal(step_type, time_step.StepType.FIRST) # Unroll over the time sequence. states, network_state = self._dynamic_unroll( states, reset_mask=reset_mask, initial_state=network_state, training=training) states = batch_squash.flatten(states) # [B, T, ...] -> [B x T, ...] for layer in self._output_layers: states = layer(states, training=training) actions = [] for layer, spec in zip(self._action_layers, self._flat_action_spec): action = layer(states, training=training) action = common.scale_to_spec(action, spec) action = batch_squash.unflatten( action) # [B x T, ...] -> [B, T, ...] if not has_time_dim: action = tf.squeeze(action, axis=1) actions.append(action) output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec, actions) return output_actions, network_state
def testScaleToSpec(self): value = tf.constant([[1, -1], [0.5, -0.5], [1.0, 0.0]]) spec = tensor_spec.BoundedTensorSpec( (3, 2), tf.float32, [[-5, -5], [-4, -4], [-2, -6]], [[5, 5], [4, 4], [2, 6]], ) expected_scaled_value = np.array([[[5, -5], [2.0, -2.0], [2.0, 0.0]]]) scaled_value = common.scale_to_spec(value, spec) scaled_value_ = self.evaluate(scaled_value) self.assertAllClose(expected_scaled_value, scaled_value_)
def call(self, inputs, step_type=(), network_state=()): observations, actions = inputs observations = self._preprocess_observation_layer(observations) actions = self._preprocess_action_layer(actions) tempt = self._combine_layer([observations, actions]) tempt = self._dense1(tempt) tempt = self._dense2(tempt) q_values = self._q_value_projection_layer(tempt) q_values = common_utils.scale_to_spec(q_values, self._q_value_spec) return q_values, network_state
def call(self, observations, step_type=(), network_state=()): outer_rank = nest_utils.get_outer_rank(observations, self.input_tensor_spec) # We use batch_squash here in case the observations have a time sequence # compoment. batch_squash = utils.BatchSquash(outer_rank) observations = tf.nest.map_structure(batch_squash.flatten, observations) state, network_state = self._encoder(observations, step_type=step_type, network_state=network_state) actions = self._action_projection_layer(state) actions = common_utils.scale_to_spec(actions, self._single_action_spec) actions = batch_squash.unflatten(actions) return tf.nest.pack_sequence_as(self._action_spec, [actions]), network_state
def call(self, observations, step_type=(), network_state=(), training=False): state, network_state = self._encoder(observations, step_type=step_type, network_state=network_state, training=training) output = self._action_layer(state, training=training) actions = common.scale_to_spec(output, self._single_action_spec) output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec, [actions]) return output_actions, network_state
def call(self, observations, step_type=(), network_state=()): outer_rank = nest_utils.get_outer_rank(observations, self.input_tensor_spec) batch_squash = BatchSquash(outer_rank) observations = nest.map_structure(batch_squash.flatten, observations) state, network_state = self._encoder(observations, step_type=step_type, network_state=network_state) actions = self._action_projection_layer(state) actions = scale_to_spec(actions, self._single_action_spec) actions = batch_squash.unflatten(actions) return nest.pack_sequence_as(self._action_spec, [actions]), network_state
def call(self, observation, step_type, network_state=None): outer_rank = nest_utils.get_outer_rank(observation, self.input_tensor_spec) batch_squash = utils.BatchSquash(outer_rank) observation, network_state = self._lstm_encoder( observation, step_type=step_type, network_state=network_state) states = batch_squash.flatten(observation) actions = [] for layer, spec in zip(self._action_layers, self._flat_action_spec): action = layer(states) action = common.scale_to_spec(action, spec) action = batch_squash.unflatten(action) actions.append(action) output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec, actions) return output_actions, network_state
def create_actor_network(fc_layer_units, action_spec): """Create an actor network for DDPG.""" flat_action_spec = tf.nest.flatten(action_spec) if len(flat_action_spec) > 1: raise ValueError( 'Only a single action tensor is supported by this network') flat_action_spec = flat_action_spec[0] fc_layers = [dense(num_units) for num_units in fc_layer_units] num_actions = flat_action_spec.shape.num_elements() action_fc_layer = tf.keras.layers.Dense( num_actions, activation=tf.keras.activations.tanh, kernel_initializer=tf.keras.initializers.RandomUniform(minval=-0.003, maxval=0.003)) scaling_layer = tf.keras.layers.Lambda( lambda x: common.scale_to_spec(x, flat_action_spec)) return sequential.Sequential(fc_layers + [action_fc_layer, scaling_layer])
def call(self, observations, step_type=(), network_state=()): outer_rank = nest_utils.get_outer_rank(observations, self.input_tensor_spec) # We use batch_squash here in case the observations have a time sequence # compoment. batch_squash = utils.BatchSquash(outer_rank) observations = tf.nest.map_structure(batch_squash.flatten, observations) state, network_state = self._encoder(observations, step_type=step_type, network_state=network_state) actions = self._action_projection_layer(state) actions = common_utils.scale_to_spec(actions, self._action_spec) actions = batch_squash.unflatten(actions) return tf.nest.pack_sequence_as(self._action_spec, [actions]), network_state ####ACTOR TEST#### #action_spec = array_spec.BoundedArraySpec((6,), np.float32, minimum=0, maximum=10) #observation_spec = array_spec.BoundedArraySpec((64, 64, 3), np.float32, minimum=0, # maximum=255) # #random_env = random_py_environment.RandomPyEnvironment(observation_spec, action_spec=action_spec) # ## Convert the environment to a TFEnv to generate tensors. #tf_env = tf_py_environment.TFPyEnvironment(random_env) # ##preprocessing_layers = { ## 'image': tf.keras.models.Sequential([tf.keras.layers.Conv2D(8, 4), ## tf.keras.layers.Flatten()]), ## 'vector': tf.keras.layers.Dense(5) ## } ##preprocessing_combiner = tf.keras.layers.Concatenate(axis=-1) #actor = ActorNetwork(tf_env.observation_spec(), # tf_env.action_spec()) # #time_step = tf_env.reset() ##print(actor(time_step.observation,time_step.step_type))
def call(self, observation, step_type, network_state=None, training=False): # Preprocess for multiple observations if self._flat_preprocessing_layers is None: processed = observation else: processed = [] for obs, layer in zip( nest.flatten_up_to( self._preprocessing_nest, observation, check_types=False), self._flat_preprocessing_layers): processed.append(layer(obs, training=training)) if len(processed) == 1 and self._preprocessing_combiner is None: # If only one observation is passed and the preprocessing_combiner # is unspecified, use the preprocessed version of this observation. processed = processed[0] observation = processed if self._preprocessing_combiner is not None: observation = self._preprocessing_combiner(observation) observation_spec = tensor_spec.TensorSpec((observation.shape[-1],), dtype=observation.dtype) num_outer_dims = nest_utils.get_outer_rank(observation, observation_spec) if num_outer_dims not in (1, 2): raise ValueError( 'Input observation must have a batch or batch x time outer shape.') has_time_dim = num_outer_dims == 2 if not has_time_dim: # Add a time dimension to the inputs. observation = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1), observation) step_type = tf.nest.map_structure(lambda t: tf.expand_dims(t, 1), step_type) states = tf.cast(tf.nest.flatten(observation)[0], tf.float32) batch_squash = utils.BatchSquash(2) # Squash B, and T dims. states = batch_squash.flatten(states) # [B, T, ...] -> [B x T, ...] for layer in self._input_layers: states = layer(states, training=training) states = batch_squash.unflatten(states) # [B x T, ...] -> [B, T, ...] with tf.name_scope('reset_mask'): reset_mask = tf.equal(step_type, time_step.StepType.FIRST) # Unroll over the time sequence. states, network_state = self._dynamic_unroll( states, reset_mask, initial_state=network_state, training=training) states = batch_squash.flatten(states) # [B, T, ...] -> [B x T, ...] for layer in self._output_layers: states = layer(states, training=training) actions = [] for layer, spec in zip(self._action_layers, self._flat_action_spec): action = layer(states, training=training) action = common.scale_to_spec(action, spec) action = batch_squash.unflatten(action) # [B x T, ...] -> [B, T, ...] if not has_time_dim: action = tf.squeeze(action, axis=1) actions.append(action) output_actions = tf.nest.pack_sequence_as(self._output_tensor_spec, actions) return output_actions, network_state