Example #1
0
def test_check_observations_async_vector_env(shared_memory):
    # CubeCrash-v0 - observation_space: Box(40, 32, 3)
    env_fns = [make_env("CubeCrash-v0", i) for i in range(8)]
    # MemorizeDigits-v0 - observation_space: Box(24, 32, 3)
    env_fns[1] = make_env("MemorizeDigits-v0", 1)
    with pytest.raises(RuntimeError):
        env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
        env.close(terminate=True)
def test_check_observations_sync_vector_env():
    # CubeCrash-v0 - observation_space: Box(40, 32, 3)
    env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
    # MemorizeDigits-v0 - observation_space: Box(24, 32, 3)
    env_fns[1] = make_env('MemorizeDigits-v0', 1)
    with pytest.raises(RuntimeError):
        env = SyncVectorEnv(env_fns)
        env.close()
def test_step_sync_vector_env(use_single_action_space):
    env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
    try:
        env = SyncVectorEnv(env_fns)
        observations = env.reset()
        if use_single_action_space:
            actions = [env.single_action_space.sample() for _ in range(8)]
        else:
            actions = env.action_space.sample()
        observations, rewards, dones, _ = env.step(actions)
    finally:
        env.close()

    assert isinstance(env.observation_space, Box)
    assert isinstance(observations, np.ndarray)
    assert observations.dtype == env.observation_space.dtype
    assert observations.shape == (8, ) + env.single_observation_space.shape
    assert observations.shape == env.observation_space.shape

    assert isinstance(rewards, np.ndarray)
    assert isinstance(rewards[0], (float, np.floating))
    assert rewards.ndim == 1
    assert rewards.size == 8

    assert isinstance(dones, np.ndarray)
    assert dones.dtype == np.bool_
    assert dones.ndim == 1
    assert dones.size == 8
Example #4
0
def test_step_out_of_order_async_vector_env(shared_memory):
    env_fns = [make_env("CubeCrash-v0", i) for i in range(4)]
    with pytest.raises(NoAsyncCallError):
        try:
            env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
            actions = env.action_space.sample()
            observations = env.reset()
            observations, rewards, dones, infos = env.step_wait()
        except AlreadyPendingCallError as exception:
            assert exception.name == "step"
            raise
        finally:
            env.close(terminate=True)

    with pytest.raises(AlreadyPendingCallError):
        try:
            env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
            actions = env.action_space.sample()
            env.reset_async()
            env.step_async(actions)
        except AlreadyPendingCallError as exception:
            assert exception.name == "reset"
            raise
        finally:
            env.close(terminate=True)
def test_create_sync_vector_env():
    env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
    try:
        env = SyncVectorEnv(env_fns)
    finally:
        env.close()

    assert env.num_envs == 8
Example #6
0
def test_create_async_vector_env(shared_memory):
    env_fns = [make_env("CubeCrash-v0", i) for i in range(8)]
    try:
        env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
    finally:
        env.close()

    assert env.num_envs == 8
Example #7
0
def test_no_copy_async_vector_env(shared_memory):
    env_fns = [make_env("CubeCrash-v0", i) for i in range(8)]
    try:
        env = AsyncVectorEnv(env_fns, shared_memory=shared_memory, copy=False)
        observations = env.reset()
        observations[0] = 128
        assert np.all(env.observations[0] == 128)
    finally:
        env.close()
def test_reset_sync_vector_env():
    env_fns = [make_env('CubeCrash-v0', i) for i in range(8)]
    try:
        env = SyncVectorEnv(env_fns)
        observations = env.reset()
    finally:
        env.close()

    assert isinstance(env.observation_space, Box)
    assert isinstance(observations, np.ndarray)
    assert observations.dtype == env.observation_space.dtype
    assert observations.shape == (8, ) + env.single_observation_space.shape
    assert observations.shape == env.observation_space.shape
def main():
    np.set_printoptions(suppress=True,
                        formatter={'float_kind': '{:0.2f}'.format})
    env_fns = [make_env('MountainCar-v0', i) for i in range(4)]
    try:
        env = SyncVectorEnv(env_fns)
    finally:
        env.close()

    state_size = env.observation_space.shape[1]
    action_size = env.action_space[0].n

    NUM_EPISODES = 1000
    STEPS_PER_EPISODE = 200
    batch_size = 32
    eps_mean_reward = [0.0] * NUM_EPISODES

    agent = DQNAgent(state_size, action_size)
    start_time = datetime.now()
    for ep_count in range(NUM_EPISODES):
        episode_rew = 0
        state = env.reset()
        if (ep_count == 0):
            print("ep={} state.shape={}".format(ep_count, state.shape))
        #state = np.reshape(state, [-1, state_size])
        ep_start_time = datetime.now()
        for time in range(STEPS_PER_EPISODE):
            # env.render()
            action = agent.act(state)
            next_state, reward, done, _ = env.step(action)
            episode_rew += np.sum(reward)
            #next_state = np.reshape(next_state, [-1, state_size])
            if (time == 0):
                print("ep={} time={} action.len={} next_state.shape={} elaps_time={}".format( \
                    ep_count, time, len(action), next_state.shape, (datetime.now() - ep_start_time)) )
            #add to DQN buffer
            for idx in range(0, env.num_envs):
                agent.memorize(state[idx], action[idx], reward[idx],
                               next_state[idx], done[idx])
            state = next_state
            if time >= STEPS_PER_EPISODE - 1:
                eps_mean_reward[ep_count] = np.mean(episode_rew) / time
                print("ep: {}/{}, mean_avg_reward: {}, exec_time= {}".format( \
                    ep_count , NUM_EPISODES, eps_mean_reward[ep_count], (datetime.now() - ep_start_time)))
            #update DQN model if there are enough samples
            if len(agent.memory) > batch_size and time % 8 == 0:
                agent.replay(batch_size)
        #if ep_count % 2 == 0:
        #    agent.save(str(os.path.join(save_path,'ma-foraging-dqn.h5')))
    print("Finish train DQN Agent with {} episodes in {}".format(
        NUM_EPISODES, (datetime.now() - start_time)))
def test_vector_env_equal(shared_memory):
    env_fns = [make_env('CubeCrash-v0', i) for i in range(4)]
    num_steps = 100
    try:
        async_env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
        sync_env = SyncVectorEnv(env_fns)

        async_env.seed(0)
        sync_env.seed(0)

        assert async_env.num_envs == sync_env.num_envs
        assert async_env.observation_space == sync_env.observation_space
        assert async_env.single_observation_space == sync_env.single_observation_space
        assert async_env.action_space == sync_env.action_space
        assert async_env.single_action_space == sync_env.single_action_space

        async_observations = async_env.reset()
        sync_observations = sync_env.reset()
        assert np.all(async_observations == sync_observations)

        for _ in range(num_steps):
            actions = async_env.action_space.sample()
            assert actions in sync_env.action_space

            async_observations, async_rewards, async_dones, _ = async_env.step(
                actions)
            sync_observations, sync_rewards, sync_dones, _ = sync_env.step(
                actions)

            assert np.all(async_observations == sync_observations)
            assert np.all(async_rewards == sync_rewards)
            assert np.all(async_dones == sync_dones)

    finally:
        async_env.close()
        sync_env.close()
Example #11
0
import time
import multiprocessing as mp
import threading
from gym.vector.tests.utils import make_env, make_slow_env
from gym.vector.async_vector_env import AsyncVectorEnv

import concurrent.futures

from agent import Agent
from agent_test import AgentTest

print("Cores", mp.cpu_count())
if __name__ == '__main__':
    #Number of agents working in parallel
    num_agents = 100
    env_fns = [make_env('CartPole-v0', num_agents) for _ in range(num_agents)]
    env = AsyncVectorEnv(env_fns)
    agent = Agent(env, state_size=4, action_size=2, num_agents=num_agents)

    env_test = gym.make('CartPole-v0')
    agent_test = AgentTest(env_test, state_size=4, action_size=2)

    one_set_of_weights = 0.1*np.random.randn(agent.get_weights_dim())
    all_sets_of_weights = []
    for i in range(num_agents):
        all_sets_of_weights.append(one_set_of_weights)

    start_time = time.time()
    for i in range(100):
        rewards = agent.evaluate(all_sets_of_weights, num_agents)
    print("Time needed for VecEnv approach: ", time.time() - start_time)
Example #12
0
def test_already_closed_async_vector_env(shared_memory):
    env_fns = [make_env("CubeCrash-v0", i) for i in range(4)]
    with pytest.raises(ClosedEnvironmentError):
        env = AsyncVectorEnv(env_fns, shared_memory=shared_memory)
        env.close()
        observations = env.reset()