def test_ppo_model_dc_visual(): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): model = PPOModel( make_brain_parameters(discrete_action=True, visual_inputs=2)) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32), model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32), model.action_masks: np.ones([2, 2], dtype=np.float32), } sess.run(run_list, feed_dict=feed_dict)
def test_ppo_model_cc_vector_rnn(): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): memory_size = 128 model = PPOModel( make_brain_parameters(discrete_action=False, visual_inputs=0), use_recurrent=True, m_size=memory_size, ) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.memory_out, ] feed_dict = { model.batch_size: 1, model.sequence_length: 2, model.memory_in: np.zeros((1, memory_size), dtype=np.float32), model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.epsilon: np.array([[0, 1]]), } sess.run(run_list, feed_dict=feed_dict)
def test_tanh_distribution(): with tf.Graph().as_default(): logits = tf.Variable(initial_value=[[0, 0]], trainable=True, dtype=tf.float32) distribution = GaussianDistribution(logits, act_size=VECTOR_ACTION_SPACE, reparameterize=False, tanh_squash=True) sess = tf.Session() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) output = sess.run(distribution.sample) for _ in range(10): output = sess.run( [distribution.sample, distribution.log_probs]) for out in output: assert out.shape[1] == VECTOR_ACTION_SPACE[0] # Assert action never exceeds [-1,1] action = output[0][0] for act in action: assert act >= -1 and act <= 1 output = sess.run([distribution.total_log_probs]) assert output[0].shape[0] == 1
def test_average_gradients(mock_get_devices, dummy_config): tf.reset_default_graph() mock_get_devices.return_value = [ "/device:GPU:0", "/device:GPU:1", "/device:GPU:2", "/device:GPU:3", ] trainer_parameters = dummy_config trainer_parameters["model_path"] = "" trainer_parameters["keep_checkpoints"] = 3 brain = create_mock_brainparams() with tf.Session() as sess: policy = MultiGpuPPOPolicy(0, brain, trainer_parameters, False, False) var = tf.Variable(0) tower_grads = [ [(tf.constant(0.1), var)], [(tf.constant(0.2), var)], [(tf.constant(0.3), var)], [(tf.constant(0.4), var)], ] avg_grads = policy.average_gradients(tower_grads) init = tf.global_variables_initializer() sess.run(init) run_out = sess.run(avg_grads) assert run_out == [(0.25, 0)]
def test_gaussian_distribution(): with tf.Graph().as_default(): logits = tf.Variable(initial_value=[[1, 1]], trainable=True, dtype=tf.float32) distribution = GaussianDistribution( logits, act_size=VECTOR_ACTION_SPACE, reparameterize=False, tanh_squash=False, ) sess = tf.Session() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) output = sess.run(distribution.sample) for _ in range(10): output = sess.run( [distribution.sample, distribution.log_probs]) for out in output: assert out.shape[1] == VECTOR_ACTION_SPACE[0] output = sess.run([distribution.total_log_probs]) assert output[0].shape[0] == 1 # Test entropy is correct log_std_tensor = tf.get_default_graph().get_tensor_by_name( "log_std/BiasAdd:0") feed_dict = {log_std_tensor: [[1.0, 1.0]]} entropy = sess.run([distribution.entropy], feed_dict=feed_dict) # Entropy with log_std of 1.0 should be 2.42 assert pytest.approx(entropy[0], 0.01) == 2.42
def test_multicategorical_distribution(): with tf.Graph().as_default(): logits = tf.Variable(initial_value=[[0, 0]], trainable=True, dtype=tf.float32) action_masks = tf.Variable( initial_value=[[1 for _ in range(sum(DISCRETE_ACTION_SPACE))]], trainable=True, dtype=tf.float32, ) distribution = MultiCategoricalDistribution( logits, act_size=DISCRETE_ACTION_SPACE, action_masks=action_masks) sess = tf.Session() with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) output = sess.run(distribution.sample) for _ in range(10): sample, log_probs, entropy = sess.run([ distribution.sample, distribution.log_probs, distribution.entropy ]) assert len(log_probs[0]) == sum(DISCRETE_ACTION_SPACE) # Assert action never exceeds [-1,1] assert len(sample[0]) == len(DISCRETE_ACTION_SPACE) for i, act in enumerate(sample[0]): assert act >= 0 and act <= DISCRETE_ACTION_SPACE[i] output = sess.run([distribution.total_log_probs]) assert output[0].shape[0] == 1 # Make sure entropy is correct assert entropy[0] > 3.8 # Test masks mask = [] for space in DISCRETE_ACTION_SPACE: mask.append(1) for _action_space in range(1, space): mask.append(0) for _ in range(10): sample, log_probs = sess.run( [distribution.sample, distribution.log_probs], feed_dict={action_masks: [mask]}, ) for act in sample[0]: assert act >= 0 and act <= 1 output = sess.run([distribution.total_log_probs])
def test_sac_model_cc_vector(): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): model = SACModel( make_brain_parameters(discrete_action=False, visual_inputs=0) ) init = tf.global_variables_initializer() sess.run(init) run_list = [model.output, model.value, model.entropy, model.learning_rate] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), } sess.run(run_list, feed_dict=feed_dict)
def test_visual_cc_bc_model(): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): model = BehavioralCloningModel( make_brain_parameters(discrete_action=False, visual_inputs=2)) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.policy] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.visual_in[0]: np.ones([2, 40, 30, 3], dtype=np.float32), model.visual_in[1]: np.ones([2, 40, 30, 3], dtype=np.float32), } sess.run(run_list, feed_dict=feed_dict)
def test_dc_bc_model(): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): model = BehavioralCloningModel( make_brain_parameters(discrete_action=True, visual_inputs=0)) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.action_probs] feed_dict = { model.batch_size: 2, model.dropout_rate: 1.0, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.action_masks: np.ones([2, 2]), } sess.run(run_list, feed_dict=feed_dict)
def _initialize_graph(self): with self.graph.as_default(): self.saver = tf.train.Saver(max_to_keep=self._keep_checkpoints) init = tf.global_variables_initializer() self.sess.run(init)
def initialize(self): with self.graph.as_default(): init = tf.global_variables_initializer() self.sess.run(init)