def test_network_can_preprocess_and_combine_no_time_dim(self): if tf.executing_eagerly(): self.skipTest('b/123776211') batch_size = 3 num_actions = 2 lstm_size = 5 states = (tf.random.uniform([batch_size, 1]), tf.random.uniform([batch_size])) preprocessing_layers = ( tf.keras.layers.Dense(4), tf.keras.Sequential([ expand_dims_layer.ExpandDims(-1), # Convert to vec size (1,). tf.keras.layers.Dense(4) ])) network = q_rnn_network.QRnnNetwork( input_tensor_spec=(tensor_spec.TensorSpec([1], tf.float32), tensor_spec.TensorSpec([], tf.float32)), preprocessing_layers=preprocessing_layers, preprocessing_combiner=tf.keras.layers.Add(), lstm_size=(lstm_size, ), action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, num_actions - 1)) empty_step_type = tf.constant([time_step.StepType.FIRST] * batch_size) q_values, _ = network(states, empty_step_type) # Processed 1 time step and the time axis was squeezed back. self.assertAllEqual(q_values.shape.as_list(), [batch_size, num_actions]) # At least 2 variables each for the preprocessing layers. self.assertGreater(len(network.trainable_variables), 4)
def test_network_can_preprocess_and_combine(self): batch_size = 3 frames = 5 num_actions = 2 lstm_size = 6 states = (tf.random.uniform([batch_size, frames, 1]), tf.random.uniform([batch_size, frames])) preprocessing_layers = ( tf.keras.layers.Dense(4), tf.keras.Sequential([ expand_dims_layer.ExpandDims(-1), # Convert to vec size (1,). tf.keras.layers.Dense(4) ])) network = q_rnn_network.QRnnNetwork( input_tensor_spec=(tensor_spec.TensorSpec([1], tf.float32), tensor_spec.TensorSpec([], tf.float32)), preprocessing_layers=preprocessing_layers, preprocessing_combiner=tf.keras.layers.Add(), lstm_size=(lstm_size, ), action_spec=tensor_spec.BoundedTensorSpec([1], tf.int32, 0, num_actions - 1)) empty_step_type = tf.constant([[time_step.StepType.FIRST] * frames] * batch_size) q_values, _ = network(states, empty_step_type) self.assertAllEqual(q_values.shape.as_list(), [batch_size, frames, num_actions]) # At least 2 variables each for the preprocessing layers. self.assertGreater(len(network.trainable_variables), 4)
def testTrainWithNN(self, is_convert, is_distribution_network): # Hard code a trajectory shaped (time=6, batch=1, ...). traj, time_step_spec, action_spec = create_arbitrary_trajectory() if is_convert: # Convert to single step trajectory of shapes (batch=6, 1, ...). traj = tf.nest.map_structure(common.transpose_batch_time, traj) if is_distribution_network: cloning_net = sequential.Sequential([ expand_dims_layer.ExpandDims(-1), tf.keras.layers.Dense(action_spec.maximum - action_spec.minimum + 1), tf.keras.layers.Lambda( lambda t: tfp.distributions.Categorical(logits=t)), ]) else: cloning_net = q_network.QNetwork(time_step_spec.observation, action_spec) agent = behavioral_cloning_agent.BehavioralCloningAgent( time_step_spec, action_spec, cloning_network=cloning_net, optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=0.001), num_outer_dims=2) # Disable clipping to make sure we can see the difference in behavior agent.policy._clip = False # TODO(b/123883319) if tf.executing_eagerly(): train_and_loss = lambda: agent.train(traj) else: train_and_loss = agent.train(traj) self.evaluate(tf.compat.v1.global_variables_initializer()) initial_loss = self.evaluate(train_and_loss).loss for _ in range(TRAIN_ITERATIONS - 1): loss = self.evaluate(train_and_loss).loss # We don't necessarily converge to the same actions as in trajectory after # 10 steps of an untuned optimizer, but the loss should go down. self.assertGreater(initial_loss, loss)
def get_observation_processing_layer_creator(quantile_file_dir, with_z_score_normalization=False, eps=1e-8): """Wrapper for observation_processing_layer.""" quantile_map = _build_quantile_map(quantile_file_dir) expand_dims_op = expand_dims_layer.ExpandDims(-1) def observation_processing_layer(obs_spec): """Creates the layer to process observation given obs_spec.""" if obs_spec.name not in quantile_map: # Return empty will cause tf.keras.layers.Concatenate to fail. def discard_feature(obs): expanded_obs = expand_dims_op(obs) return tf.zeros_like(expanded_obs, dtype=tf.float32) func = discard_feature else: quantile, mean, std = quantile_map[obs_spec.name] def normalization(obs): expanded_obs = expand_dims_op(obs) x = tf.cast( tf.raw_ops.Bucketize(input=expanded_obs, boundaries=quantile), tf.float32) / len(quantile) features = [x, tf.sqrt(x), x * x] if with_z_score_normalization: y = tf.cast(expanded_obs, tf.float32) y = (y - mean) / (std + eps) features.append(y) return tf.concat(features, axis=-1) func = normalization return tf.keras.layers.Lambda(func) return observation_processing_layer