def test_step(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain = env.brains['RealFakeBrain'] brain_info = env.reset() brain_info = env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) with pytest.raises(UnityActionException): env.step([0]) brain_info = env.step([-1] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) with pytest.raises(UnityActionException): env.step([0] * brain.vector_action_space_size[0] * len(brain_info['RealFakeBrain'].agents)) env.close() assert env.global_done assert isinstance(brain_info, dict) assert isinstance(brain_info['RealFakeBrain'], BrainInfo) assert isinstance(brain_info['RealFakeBrain'].visual_observations, list) assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray) assert len(brain_info['RealFakeBrain'].visual_observations ) == brain.number_visual_observations assert len(brain_info['RealFakeBrain'].vector_observations) == \ len(brain_info['RealFakeBrain'].agents) assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \ brain.vector_observation_space_size * brain.num_stacked_vector_observations print("\n\n\n\n\n\n\n" + str(brain_info['RealFakeBrain'].local_done)) assert not brain_info['RealFakeBrain'].local_done[0] assert brain_info['RealFakeBrain'].local_done[2]
def test_ppo_model_dc_vector(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=0) env = UnityEnvironment(" ") model = PPOModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.action_masks: np.ones([2, 2]), } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_ppo_model_cc_vector_curio(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') model = PPOModel(env.brains["RealFakeBrain"], use_curiosity=True) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.intrinsic_reward ] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.next_vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.output: [[0.0, 0.0], [0.0, 0.0]], model.epsilon: np.array([[0, 1], [2, 3]]) } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_ppo_model_cc_vector_rnn(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") memory_size = 128 model = PPOModel(env.brains["RealFakeBrain"], use_recurrent=True, m_size=memory_size) init = tf.global_variables_initializer() sess.run(init) run_list = [ model.output, model.all_log_probs, model.value, model.entropy, model.learning_rate, model.memory_out, ] feed_dict = { model.batch_size: 1, model.sequence_length: 2, model.memory_in: np.zeros((1, memory_size)), model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.epsilon: np.array([[0, 1]]), } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_step(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") spec = env.get_agent_group_spec("RealFakeBrain") env.step() batched_step_result = env.get_step_result("RealFakeBrain") n_agents = batched_step_result.n_agents() env.set_actions("RealFakeBrain", np.zeros((n_agents, spec.action_size), dtype=np.float32)) env.step() with pytest.raises(UnityActionException): env.set_actions( "RealFakeBrain", np.zeros((n_agents - 1, spec.action_size), dtype=np.float32), ) batched_step_result = env.get_step_result("RealFakeBrain") n_agents = batched_step_result.n_agents() env.set_actions( "RealFakeBrain", -1 * np.ones( (n_agents, spec.action_size), dtype=np.float32)) env.step() env.close() assert isinstance(batched_step_result, BatchedStepResult) assert len(spec.observation_shapes) == len(batched_step_result.obs) for shape, obs in zip(spec.observation_shapes, batched_step_result.obs): assert (n_agents, ) + shape == obs.shape assert not batched_step_result.done[0] assert batched_step_result.done[2]
def test_ppo_get_value_estimates(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") brain_infos = env.reset() brain_info = brain_infos[env.external_brain_names[0]] trainer_parameters = dummy_config model_path = env.external_brain_names[0] trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 policy = PPOPolicy(0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False) run_out = policy.get_value_estimates(brain_info, 0, done=False) for key, val in run_out.items(): assert type(key) is str assert type(val) is float run_out = policy.get_value_estimates(brain_info, 0, done=True) for key, val in run_out.items(): assert type(key) is str assert val == 0.0 # Check if we ignore terminal states properly policy.reward_signals["extrinsic"].use_terminal_states = False run_out = policy.get_value_estimates(brain_info, 0, done=True) for key, val in run_out.items(): assert type(key) is str assert val != 0.0 env.close()
def test_initialization(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') with pytest.raises(UnityActionException): env.step([0]) assert env.brain_names[0] == 'RealFakeBrain' env.close()
def test_close(mock_communicator, mock_launcher): comm = MockCommunicator(discrete_action=False, visual_inputs=0) mock_communicator.return_value = comm env = UnityEnvironment(' ') assert env._loaded env.close() assert not env._loaded assert comm.has_been_closed
def test_reset(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") spec = env.get_agent_group_spec("RealFakeBrain") env.reset() batched_step_result = env.get_step_result("RealFakeBrain") env.close() assert isinstance(batched_step_result, BatchedStepResult) assert len(spec.observation_shapes) == len(batched_step_result.obs) n_agents = batched_step_result.n_agents() for shape, obs in zip(spec.observation_shapes, batched_step_result.obs): assert (n_agents, ) + shape == obs.shape
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") brain_infos = env.reset() brain_info = brain_infos[env.external_brain_names[0]] trainer_parameters = dummy_config model_path = env.external_brain_names[0] trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 policy = PPOPolicy(0, env.brains[env.external_brain_names[0]], trainer_parameters, False, False) run_out = policy.evaluate(brain_info) assert run_out["action"].shape == (3, 2) env.close()
def test_reset(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") brain = env.brains["RealFakeBrain"] brain_info = env.reset() env.close() assert isinstance(brain_info, dict) assert isinstance(brain_info["RealFakeBrain"], BrainInfo) assert isinstance(brain_info["RealFakeBrain"].visual_observations, list) assert isinstance(brain_info["RealFakeBrain"].vector_observations, np.ndarray) assert (len(brain_info["RealFakeBrain"].visual_observations) == brain.number_visual_observations) assert len(brain_info["RealFakeBrain"].vector_observations) == len( brain_info["RealFakeBrain"].agents) assert (len(brain_info["RealFakeBrain"].vector_observations[0]) == brain.vector_observation_space_size)
def test_reset(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(' ') brain = env.brains['RealFakeBrain'] brain_info = env.reset() env.close() assert not env.global_done assert isinstance(brain_info, dict) assert isinstance(brain_info['RealFakeBrain'], BrainInfo) assert isinstance(brain_info['RealFakeBrain'].visual_observations, list) assert isinstance(brain_info['RealFakeBrain'].vector_observations, np.ndarray) assert len(brain_info['RealFakeBrain'].visual_observations ) == brain.number_visual_observations assert len(brain_info['RealFakeBrain'].vector_observations) == \ len(brain_info['RealFakeBrain'].agents) assert len(brain_info['RealFakeBrain'].vector_observations[0]) == \ brain.vector_observation_space_size * brain.num_stacked_vector_observations
def test_cc_bc_model(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=False, visual_inputs=0 ) env = UnityEnvironment(" ") model = BehavioralCloningModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.policy] feed_dict = { model.batch_size: 2, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_ppo_policy_evaluate(mock_communicator, mock_launcher, dummy_config): tf.reset_default_graph() mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") env.reset() brain_name = env.get_agent_groups()[0] brain_info = step_result_to_brain_info( env.get_step_result(brain_name), env.get_agent_group_spec(brain_name)) brain_params = group_spec_to_brain_parameters( brain_name, env.get_agent_group_spec(brain_name)) trainer_parameters = dummy_config model_path = brain_name trainer_parameters["model_path"] = model_path trainer_parameters["keep_checkpoints"] = 3 policy = PPOPolicy(0, brain_params, trainer_parameters, False, False) run_out = policy.evaluate(brain_info) assert run_out["action"].shape == (3, 2) env.close()
def test_visual_dc_bc_model(mock_communicator, mock_launcher): tf.reset_default_graph() with tf.Session() as sess: with tf.variable_scope("FakeGraphScope"): mock_communicator.return_value = MockCommunicator( discrete_action=True, visual_inputs=2) env = UnityEnvironment(' ') model = BehavioralCloningModel(env.brains["RealFakeBrain"]) init = tf.global_variables_initializer() sess.run(init) run_list = [model.sample_action, model.action_probs] feed_dict = { model.batch_size: 2, model.dropout_rate: 1.0, model.sequence_length: 1, model.vector_in: np.array([[1, 2, 3, 1, 2, 3], [3, 4, 5, 3, 4, 5]]), model.visual_in[0]: np.ones([2, 40, 30, 3]), model.visual_in[1]: np.ones([2, 40, 30, 3]), model.action_masks: np.ones([2, 2]) } sess.run(run_list, feed_dict=feed_dict) env.close()
def test_initialization(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") assert env.get_agent_groups() == ["RealFakeBrain"] env.close()
def test_initialization(mock_communicator, mock_launcher): mock_communicator.return_value = MockCommunicator(discrete_action=False, visual_inputs=0) env = UnityEnvironment(" ") assert env.brain_names[0] == "RealFakeBrain" env.close()