def test_ppo_model_cc_visual(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=2) env = UnityEnvironment(' ') model = PPOModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.log_probs, model.value, model.entropy, model.learning_rate ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.visual_in[0]: np.ones([2, 40, 30, 3]), model.visual_in[1]: np.ones([2, 40, 30, 3]), model.epsilon: np.array([[0, 1], [2, 3]]) } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_ppo_model_dc_vector_curio(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=0) env = UnityEnvironment(' ') model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.intrinsic_reward ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.action_holder: [[0], [0]], model.action_masks: np.ones([2, 2]) } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_initialize_trainers(mock_communicator, mock_launcher, dummy_config, dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config): open_name = 'mlagents.trainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=1) tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1, 1, 1, '', "tests/test_mlagents.trainers.py", False) # Test for PPO trainer mock_load.return_value = dummy_config config = tc._load_config() tf.reset_default_graph() tc._initialize_trainers(config) assert (len(tc.trainers) == 1) assert (isinstance(tc.trainers['RealFakeBrain'], PPOTrainer)) # Test for Online Behavior Cloning Trainer mock_load.return_value = dummy_online_bc_config config = tc._load_config() tf.reset_default_graph() tc._initialize_trainers(config) assert (isinstance(tc.trainers['RealFakeBrain'], OnlineBCTrainer)) # Test for proper exception when trainer name is incorrect mock_load.return_value = dummy_bad_config config = tc._load_config() tf.reset_default_graph() with pytest.raises(UnityEnvironmentException): tc._initialize_trainers(config)
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') memory_size = 128 model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.memory_out ] feed_dict = { model.batch_size: 1, model.sequence_length: 2, model.memory_in: np.zeros((1, memory_size)), model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.epsilon: np.array([[0, 1]]) } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_step(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain = env.brains['RealFakeBrain'] brain_info = env.reset() brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) with pytest.raises(UnityActionException): env.step([0]) brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) with pytest.raises(UnityActionException): env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) env.close() assert env.global_done assert isinstance(brain_info, dict) assert isinstance(brain_info['RealFakeBrain'], BrainInfo) assert isinstance(brain_info['RealFakeBrain'].visual_observations, list) assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray) assert len(brain_info['RealFakeBrain'].visual_observations ) == brain.number_visual_observations assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \ len(brain_info['RealFakeBrain'].agents) assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \ brain.vector_observation_space_size * brain.num_stacked_vector_observations print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done)) assert not brain_info['RealFakeBrain'].local_done[0] assert brain_info['RealFakeBrain'].local_done[2]
def test_close(mock_communicator, mock_launcher): comm = MockCommunicator(discrete_action=False, visual_inputs=0) mock_communicator.return_value = comm env = UnityEnvironment(' ') assert env._loaded env.close() assert not env._loaded assert comm.has_been_closed
def test_initialization(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') with pytest.raises(UnityActionException): env.step([0]) assert env.brain_names[0] == 'RealFakeBrain' env.close()
def test_load_config(mock_communicator, mock_launcher, dummy_config): open_name = 'mlagents.trainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_load.return_value = dummy_config mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=1) mock_load.return_value = dummy_config tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1, '', '', False) config = tc._load_config() assert (len(config) == 1) assert (config['default']['trainer'] == "ppo")
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain_infos = env.reset() brain_info = brain_infos[env.brain_names[0]] trainer_parameters = dummy_config model_path = env.brain_names[0] trainer_parameters['model_path'] = model_path trainer_parameters['keep_checkpoints'] = 3 policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters, False, False) run_out = policy.evaluate(brain_info) assert run_out['action'].shape == (3, 2) env.close()
def test_cc_bc_model(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') model = BehavioralCloningModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.policy] feed_dict = {model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]])} sess.run(run_list, feed_dict=feed_dict) env.close()
def test_multi_agent(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0, stack=False, num_agents=2) # Test for incorrect number of agents. with pytest.raises(UnityGymException): UnityEnv(' ', multiagent=False) env = UnityEnv(' ', use_visual=False, multiagent=True) assert isinstance(env.reset(), list) actions = [env.action_space.sample() for i in range(env.number_agents)] obs, rew, done, info = env.step(actions) assert isinstance(obs, list) assert isinstance(rew, list) assert isinstance(done, list) assert isinstance(info, dict)
def test_ppo_policy_evaluate(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain_infos = env.reset() brain_info = brain_infos[env.brain_names[0]] trainer_parameters = dummy_config() graph_scope = env.brain_names[0] trainer_parameters['graph_scope'] = graph_scope policy = PPOPolicy(0, env.brains[env.brain_names[0]], trainer_parameters, sess, False) init = tf.global_variables_initializer() sess.run(init) run_out = policy.evaluate(brain_info) assert run_out['action'].shape == (3, 2) env.close()
def test_reset(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain = env.brains['RealFakeBrain'] brain_info = env.reset() env.close() assert not env.global_done assert isinstance(brain_info, dict) assert isinstance(brain_info['RealFakeBrain'], BrainInfo) assert isinstance(brain_info['RealFakeBrain'].visual_observations, list) assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray) assert len(brain_info['RealFakeBrain'].visual_observations ) == brain.number_visual_observations assert brain_info['RealFakeBrain'].vector_observations.shape[0] == \ len(brain_info['RealFakeBrain'].agents) assert brain_info['RealFakeBrain'].vector_observations.shape[1] == \ brain.vector_observation_space_size * brain.num_stacked_vector_observations
def test_gym_wrapper(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0, stack=False, num_agents=1) # Test for incorrect number of agents. with pytest.raises(UnityGymException): UnityEnv(' ', use_visual=False, multiagent=True) env = UnityEnv(' ', use_visual=False) assert isinstance(env, UnityEnv) assert isinstance(env.reset(), np.ndarray) actions = env.action_space.sample() assert actions.shape[0] == 2 obs, rew, done, info = env.step(actions) assert isinstance(obs, np.ndarray) assert isinstance(rew, float) assert isinstance(done, bool) assert isinstance(info, dict)
def test_initialize_offline_trainers(mock_communicator, mock_launcher, dummy_config, dummy_offline_bc_config, dummy_online_bc_config, dummy_bad_config): open_name = 'mlagents.trainers.trainer_controller' + '.open' with mock.patch('yaml.load') as mock_load: with mock.patch(open_name, create=True) as _: mock_communicator.return_value = MockCommunicator( discrete_action=False, stack=False, visual_inputs=0, brain_name="Ball3DBrain", vec_obs_size=8) tc = TrainerController(' ', ' ', 1, None, True, False, False, 1, 1, 1, 1, '', "tests/test_mlagents.trainers.py", False) # Test for Offline Behavior Cloning Trainer mock_load.return_value = dummy_offline_bc_config config = tc._load_config() tf.reset_default_graph() tc._initialize_trainers(config) assert (isinstance(tc.trainers['Ball3DBrain'], OfflineBCTrainer))
def test_initialization(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=True, visual_inputs=1) tc = TrainerController(' ', ' ', 1, None, True, True, False, 1, 1, 1, 1, '', "tests/test_mlagents.trainers.py", False) assert (tc.env.brain_names[0] == 'RealFakeBrain')