def create_ppo_policy_mock( mock_env, dummy_config, reward_signal_config, use_rnn, use_discrete, use_visual ): if not use_visual: mock_brain = mb.create_mock_brainparams( vector_action_space_type="discrete" if use_discrete else "continuous", vector_action_space_size=DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE, vector_observation_space_size=VECTOR_OBS_SPACE, ) mock_braininfo = mb.create_mock_braininfo( num_agents=NUM_AGENTS, num_vector_observations=VECTOR_OBS_SPACE, num_vector_acts=sum( DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE ), discrete=use_discrete, ) else: mock_brain = mb.create_mock_brainparams( vector_action_space_type="discrete" if use_discrete else "continuous", vector_action_space_size=DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE, vector_observation_space_size=0, number_visual_observations=1, ) mock_braininfo = mb.create_mock_braininfo( num_agents=NUM_AGENTS, num_vis_observations=1, num_vector_acts=sum( DISCRETE_ACTION_SPACE if use_discrete else VECTOR_ACTION_SPACE ), discrete=use_discrete, ) mb.setup_mock_unityenvironment(mock_env, mock_brain, mock_braininfo) env = mock_env() trainer_parameters = dummy_config model_path = env.brain_names[0] trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 trainer_parameters["reward_signals"].update(reward_signal_config) trainer_parameters["use_recurrent"] = use_rnn policy = PPOPolicy(0, mock_brain, trainer_parameters, False, False) return env, policy
def test_update(mock_get_devices, mock_construct_feed_dict, mock_execute_model, dummy_config): tf.reset_default_graph() mock_get_devices.return_value = ["/device:GPU:0", "/device:GPU:1"] mock_construct_feed_dict.return_value = {} mock_execute_model.return_value = { "value_loss": 0.1, "policy_loss": 0.3, "update_batch": None, } trainer_parameters = dummy_config trainer_parameters["model_path"] = "" trainer_parameters["keep_checkpoints"] = 3 brain = create_mock_brainparams() policy = MultiGpuPPOPolicy(0, brain, trainer_parameters, False, False) mock_mini_batch = mock.Mock() mock_mini_batch.items.return_value = [("action", [1, 2]), ("value", [3, 4])] run_out = policy.update(mock_mini_batch, 1) assert mock_mini_batch.items.call_count == len( mock_get_devices.return_value) assert mock_construct_feed_dict.call_count == len( mock_get_devices.return_value) assert run_out["Losses/Value Loss"] == 0.1 assert run_out["Losses/Policy Loss"] == 0.3
def test_average_gradients(mock_get_devices, dummy_config): tf.reset_default_graph() mock_get_devices.return_value = [ "/device:GPU:0", "/device:GPU:1", "/device:GPU:2", "/device:GPU:3", ] trainer_parameters = dummy_config trainer_parameters["model_path"] = "" trainer_parameters["keep_checkpoints"] = 3 brain = create_mock_brainparams() with tf.Session() as sess: policy = MultiGpuPPOPolicy(0, brain, trainer_parameters, False, False) var = tf.Variable(0) tower_grads = [ [(tf.constant(0.1), var)], [(tf.constant(0.2), var)], [(tf.constant(0.3), var)], [(tf.constant(0.4), var)], ] avg_grads = policy.average_gradients(tower_grads) init = tf.global_variables_initializer() sess.run(init) run_out = sess.run(avg_grads) assert run_out == [(0.25, 0)]
def create_mock_3dball_brain(): mock_brain = mb.create_mock_brainparams( vector_action_space_type="continuous", vector_action_space_size=[2], vector_observation_space_size=8, ) return mock_brain
def create_mock_brain(): mock_brain = mb.create_mock_brainparams( vector_action_space_type="continuous", vector_action_space_size=[2], vector_observation_space_size=8, number_visual_observations=1, ) return mock_brain
def create_mock_banana_brain(): mock_brain = mb.create_mock_brainparams( number_visual_observations=1, vector_action_space_type="discrete", vector_action_space_size=[3, 3, 3, 2], vector_observation_space_size=0, ) return mock_brain
def test_create_model(mock_get_devices, dummy_config): tf.reset_default_graph() mock_get_devices.return_value = [ "/device:GPU:0", "/device:GPU:1", "/device:GPU:2", "/device:GPU:3", ] trainer_parameters = dummy_config trainer_parameters["model_path"] = "" trainer_parameters["keep_checkpoints"] = 3 brain = create_mock_brainparams() policy = MultiGpuPPOPolicy(0, brain, trainer_parameters, False, False) assert len(policy.towers) == len(mock_get_devices.return_value)