Ejemplo n.º 1
0
 def test_make_vec_env(self):
     venv1 = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 5, 1,
                          True)
     venv2 = make_vec_env(ParallelVecEnv, make_gym_env, 'CartPole-v1', 5, 1,
                          True)
     assert isinstance(venv1, VecEnv) and isinstance(venv1, SerialVecEnv)
     assert isinstance(venv2, VecEnv) and isinstance(venv2, ParallelVecEnv)
     assert venv1.num_env == venv2.num_env
     env_spec1 = EnvSpec(venv1)
     assert env_spec1.num_env == venv1.num_env
     env_spec2 = EnvSpec(venv2)
     assert env_spec2.num_env == venv2.num_env
     assert venv1.observation_space == venv2.observation_space
     assert venv1.action_space == venv2.action_space
     assert venv1.reward_range == venv2.reward_range
     assert venv1.T == venv2.T
     o1 = venv1.reset()
     o2 = venv2.reset()
     # Two environments should have same random seeds, then same results under same actions
     assert np.allclose(o1, o2)
     a = [1] * 5
     o1, r1, d1, _ = venv1.step(a)
     o2, r2, d2, _ = venv2.step(a)
     assert np.allclose(o1, o2)
     assert np.allclose(r1, r2)
     assert np.allclose(d1, d2)
     assert not venv1.closed
     venv1.close()
     assert venv1.closed
     assert not venv2.closed
     venv2.close()
     assert venv2.closed
Ejemplo n.º 2
0
    def test_make_gym_env(self):
        env = make_gym_env(env_id='CartPole-v1', seed=0, monitor=False)
        assert isinstance(env, Env)
        assert not isinstance(env, gym.Env)
        assert isinstance(env, Wrapper)
        assert isinstance(env.observation_space, Box)
        assert isinstance(env.action_space, Discrete)
        env_spec = EnvSpec(env)
        assert env_spec.control_type == 'Discrete'
        assert env_spec.T == 500
        assert env_spec.max_episode_reward == 475.0
        assert env_spec.reward_range == (-float('inf'), float('inf'))
        assert not env_spec.is_vec_env

        with pytest.raises(TypeError):
            env_spec.num_env

        assert env.reset().shape == (4, )
        assert len(env.step(env.action_space.sample())) == 4

        del env
        del env_spec

        # Pendulum, continuous
        # do not test redundant part
        env = make_gym_env('Pendulum-v0', seed=0)
        assert isinstance(env, Env)
        env_spec = EnvSpec(env)
        assert isinstance(env_spec.action_space, Box)
        assert env_spec.T == 200
        assert env_spec.control_type == 'Continuous'

        assert env.reset().shape == (3, )
        assert len(env.step(env.action_space.sample())) == 4
Ejemplo n.º 3
0
def test_random_agent():
    env = make_gym_env('Pendulum-v0', 0)
    env_spec = EnvSpec(env)
    agent = RandomAgent(None, env_spec)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and out['action'].shape == (1,)

    venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(venv)
    agent = RandomAgent(None, env_spec)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and len(out['action']) == 3 and isinstance(out['action'][0], int)
Ejemplo n.º 4
0
def test_random_policy():
    env = make_gym_env('Pendulum-v0', 0)
    env_spec = EnvSpec(env)
    policy = RandomPolicy(None, env_spec)
    out = policy(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and out['action'].shape == (1, )

    venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v0', 3, 0, False)
    env_spec = EnvSpec(venv)
    policy = RandomPolicy(None, env_spec)
    out = policy(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and len(out['action']) == 3 and isinstance(
        out['action'][0], int)
Ejemplo n.º 5
0
def test_terminal_state_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    with pytest.raises(AssertionError):
        terminal_state_from_episode([1, 2, 3])

    D = BatchEpisode(env_spec)
    D.obs[0] = [0.1, 0.2, 1.3]
    D.done[0] = [False, False, True]
    D.info[0] = [{}, {}, {'terminal_observation': 0.3}]

    D.obs[1] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
    D.done[1] = [False] * 9

    D.obs[2] = [10, 15]
    D.done[2] = [False, True]
    D.info[2] = [{}, {'terminal_observation': 20}]

    terminal_states = terminal_state_from_episode(D)
    assert terminal_states.shape == (2, ) + env_spec.observation_space.shape
    assert np.allclose(terminal_states[0], 0.3)
    assert np.allclose(terminal_states[1], 20)

    D.done[0][-1] = False
    D.done[2][-1] = False
    assert terminal_state_from_episode(D) is None

    with pytest.raises(AssertionError):
        terminal_state_from_segment(D)
Ejemplo n.º 6
0
def test_returns_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]

    out = returns_from_episode(D, 1.0)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [6, 5, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [15, 14, 12, 9, 5, 0, 0, 0])
    assert np.allclose(out[2], [36, 35, 33, 30, 26, 21, 15, 8])
    del out

    out = returns_from_episode(D, 0.1)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.23, 2.3, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [1.2345, 2.345, 3.45, 4.5, 5, 0, 0, 0])
    assert np.allclose(
        out[2], [1.2345678, 2.345678, 3.45678, 4.5678, 5.678, 6.78, 7.8, 8])

    with pytest.raises(AssertionError):
        returns_from_segment(D, 0.1)
Ejemplo n.º 7
0
def test_segment_runner(env_id):
    env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0, True)
    env_spec = EnvSpec(env)

    agent = RandomAgent(None, env_spec)

    runner = SegmentRunner(None, agent, env)
    D = runner(4)

    assert len(D) == 3
    assert all([isinstance(d, Segment) for d in D])
    assert all([d.T == 4 for d in D])

    # Check if s in transition is equal to s_next in previous transition
    for d in D:
        for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
            assert np.allclose(s1.s_next, s2.s)

    # Long horizon
    D = runner(T=1000)
    for d in D:
        assert d.T == 1000

    with pytest.raises(AssertionError):
        env = make_vec_env(SerialVecEnv, make_gym_env, env_id, 3, 0, False)
        SegmentRunner(None, agent, env)
Ejemplo n.º 8
0
def test_bootstrapped_returns_from_segment():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchSegment(env_spec, 5)
    D.r[0] = [1, 2, 3, 4, 5]
    D.done[0] = [False, False, False, False, False]
    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, True, False, False]
    D.r[2] = [1, 2, 3, 4, 5]
    D.done[2] = [True, False, False, False, True]

    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    out = bootstrapped_returns_from_segment(D, last_Vs, 1.0)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [25, 24, 22, 19, 15])
    assert np.allclose(out[1], [6, 5, 3, 29, 25])
    assert np.allclose(out[2], [1, 14, 12, 9, 5])
    del out

    out = bootstrapped_returns_from_segment(D, last_Vs, 0.1)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [1.2346, 2.346, 3.46, 4.6, 6])
    assert np.allclose(out[1], [1.23, 2.3, 3, 4.7, 7])
    assert np.allclose(out[2], [1, 2.345, 3.45, 4.5, 5])

    with pytest.raises(AssertionError):
        bootstrapped_returns_from_episode(D, last_Vs, 0.1)
Ejemplo n.º 9
0
def test_returns_from_segment():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchSegment(env_spec, 5)
    D.r[0] = [1, 2, 3, 4, 5]
    D.done[0] = [False, False, False, False, False]
    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, True, False, False]
    D.r[2] = [1, 2, 3, 4, 5]
    D.done[2] = [True, False, False, False, True]

    out = returns_from_segment(D, 1.0)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [15, 14, 12, 9, 5])
    assert np.allclose(out[1], [6, 5, 3, 9, 5])
    assert np.allclose(out[2], [1, 14, 12, 9, 5])
    del out

    out = returns_from_segment(D, 0.1)
    assert out.shape == (3, 5)
    assert np.allclose(out[0], [1.2345, 2.345, 3.45, 4.5, 5])
    assert np.allclose(out[1], [1.23, 2.3, 3, 4.5, 5])
    assert np.allclose(out[2], [1, 2.345, 3.45, 4.5, 5])

    with pytest.raises(AssertionError):
        returns_from_episode(D, 0.1)
Ejemplo n.º 10
0
def test_bootstrapped_returns_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.completed[0] = True

    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.completed[1] = False

    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]
    D.completed[2] = True

    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    out = bootstrapped_returns_from_episode(D, last_Vs, 1.0)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [6, 5, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [35, 34, 32, 29, 25, 0, 0, 0])
    assert np.allclose(out[2], [36, 35, 33, 30, 26, 21, 15, 8])
    del out

    out = bootstrapped_returns_from_episode(D, last_Vs, 0.1)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.23, 2.3, 3, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [1.2347, 2.347, 3.47, 4.7, 7, 0, 0, 0])
    assert np.allclose(
        out[2], [1.2345678, 2.345678, 3.45678, 4.5678, 5.678, 6.78, 7.8, 8])

    with pytest.raises(AssertionError):
        bootstrapped_returns_from_segment(D, last_Vs, 0.1)
Ejemplo n.º 11
0
def test_final_state_from_segment():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    with pytest.raises(AssertionError):
        final_state_from_segment([1, 2, 3])

    D = BatchSegment(env_spec, 4)
    D.obs = np.random.randn(*D.obs.shape)
    D.done.fill(False)

    D.done[0, -1] = True
    D.info[0] = [{}, {}, {}, {'terminal_observation': [0.1, 0.2, 0.3, 0.4]}]

    D.done[1, 2] = True
    D.info[1] = [{}, {}, {'terminal_observation': [1, 2, 3, 4]}, {}]

    D.done[2, -1] = True
    D.info[2] = [{}, {}, {}, {'terminal_observation': [10, 20, 30, 40]}]

    final_states = final_state_from_segment(D)
    assert final_states.shape == (3, ) + env_spec.observation_space.shape
    assert np.allclose(final_states[0], [0.1, 0.2, 0.3, 0.4])
    assert np.allclose(final_states[1], D.numpy_observations[1, -1, ...])
    assert not np.allclose(final_states[1], [1, 2, 3, 4])
    assert np.allclose(final_states[2], [10, 20, 30, 40])

    with pytest.raises(AssertionError):
        final_state_from_episode(D)
Ejemplo n.º 12
0
    def __init__(self, config, agent, env):
        super().__init__(config, agent, env)

        self.env_spec = EnvSpec(self.env)

        self.obs_buffer = None  # for next call
        self.done_buffer = None  # masking
Ejemplo n.º 13
0
def test_sticky_agent():
    sticky_action = 0
    
    env = make_gym_env('CartPole-v1', 0)
    env_spec = EnvSpec(env)
    agent = StickyAgent(None, env_spec, sticky_action)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and isinstance(out['action'], int)
    assert out['action'] == sticky_action
    
    venv = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(venv)
    agent = StickyAgent(None, env_spec, sticky_action)
    out = agent.choose_action(env.reset())
    assert isinstance(out, dict)
    assert 'action' in out and len(out['action']) == 3 and isinstance(out['action'][0], int)
    assert np.allclose(out['action'], [0, 0, 0])
Ejemplo n.º 14
0
def test_categorical_head():
    with pytest.raises(AssertionError):
        env = make_gym_env('Pendulum-v0', 0)
        env_spec = EnvSpec(env)
        CategoricalHead(None, None, 30, env_spec)

    env = make_gym_env('CartPole-v1', 0)
    env_spec = EnvSpec(env)
    head = CategoricalHead(None, None, 30, env_spec)
    assert head.feature_dim == 30
    assert isinstance(head.action_head, nn.Linear)
    assert head.action_head.in_features == 30 and head.action_head.out_features == 2
    dist = head(torch.randn(3, 30))
    assert isinstance(dist, Categorical)
    assert list(dist.batch_shape) == [3]
    assert list(dist.probs.shape) == [3, 2]
    action = dist.sample()
    assert action.shape == (3, )
Ejemplo n.º 15
0
        def check(agent_name, env_name):
            # Create environment
            list_make_env = make_envs(make_env=make_gym_env, 
                                      env_id=env_name, 
                                      num_env=2, 
                                      init_seed=0)
            env = SerialVecEnv(list_make_env=list_make_env)
            env_spec = EnvSpec(env)
            assert env.num_env == 2

            # Create agent
            if agent_name == 'random':
                agent = RandomAgent(env_spec=env_spec, config=None)
            elif agent_name == 'agent1':
                agent = Agent1(config=None)
            elif agent_name == 'agent2':
                agent = Agent2(config=None)
            else:
                raise ValueError('Wrong agent name')

            # Create runner
            runner = SegmentRunner(agent=agent, env=env, gamma=1.0)

            # Small batch
            D = runner(T=3, reset=False)

            assert len(D) == 2
            assert all([isinstance(d, Segment) for d in D])
            assert all([d.T == 3 for d in D])
            assert all([d.gamma == 1.0 for d in D])

            # Check additional information
            for d in D:
                for t in d.transitions:
                    if agent_name != 'random':
                        assert 'action_logprob' in t.info

            # Check if s in transition is equal to s_next in previous transition
            for d in D:
                for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
                    assert np.allclose(s1.s_next, s2.s)

            # Take one more step, test rolling effect, i.e. first state should be same as last state in previous D
            D2 = runner(T=1, reset=False)
            assert len(D2) == 2
            assert all([d.T == 1 for d in D2])
            for d, d2 in zip(D, D2):
                assert np.allclose(d2.all_s[0][0], d.transitions[-1].s_next)

            # Long horizon
            D = runner(T=200, reset=True)
            # Segment with identical time steps
            assert all([d.T == 200 for d in D])
            # For CartPole, 200 time steps, should be somewhere done=True
            if env_name == 'CartPole-v1':
                assert any([True in d.all_done for d in D])
                assert all([len(d.trajectories) > 1 for d in D])
Ejemplo n.º 16
0
 def make_env_spec(self):
     list_make_env = make_envs(make_env=make_gym_env, 
                               env_id='CartPole-v1', 
                               num_env=3, 
                               init_seed=0)
     venv = SerialVecEnv(list_make_env=list_make_env, rolling=True)
     env_spec = EnvSpec(venv)
     
     return env_spec
Ejemplo n.º 17
0
    def make_env_spec(self):
        list_make_env = make_envs(make_env=make_gym_env,
                                  env_id='Pendulum-v0',
                                  num_env=1,
                                  init_seed=0)
        venv = SerialVecEnv(list_make_env=list_make_env)
        env_spec = EnvSpec(venv)

        return env_spec
Ejemplo n.º 18
0
        def check(agent_name, env_name):
            # Create environment
            list_make_env = make_envs(make_env=make_gym_env, 
                                      env_id=env_name, 
                                      num_env=1, 
                                      init_seed=0)
            env = SerialVecEnv(list_make_env=list_make_env)
            env_spec = EnvSpec(env)
            
            # Create agent
            if agent_name == 'random':
                agent = RandomAgent(env_spec=env_spec, config=None)
            elif agent_name == 'agent1':
                agent = Agent1(config=None)
            elif agent_name == 'agent2':
                agent = Agent2(config=None)
            else:
                raise ValueError('Wrong agent name')
            
            # Test: not allowed more than one environment for TrajectoryRunner
            with pytest.raises(AssertionError):
                list_make_env2 = make_envs(make_env=make_gym_env, 
                                          env_id=env_name, 
                                          num_env=2, 
                                          init_seed=0)
                env2 = SerialVecEnv(list_make_env=list_make_env2)

                runner2 = TrajectoryRunner(agent=agent, env=env2, gamma=1.0)
            
            # Create runner
            runner = TrajectoryRunner(agent=agent, env=env, gamma=1.0)

            # Small batch
            D = runner(N=3, T=4)

            assert len(D) == 3
            assert all([isinstance(d, Trajectory) for d in D])
            assert all([d.T == 4 for d in D])
            assert all([d.gamma == 1.0 for d in D])

            # Check additional information
            for d in D:
                for t in d.transitions:
                    if agent_name != 'random':
                        assert 'action_logprob' in t.info

            # Check if s in transition is equal to s_next in previous transition
            for d in D:
                for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
                    assert np.allclose(s1.s_next, s2.s)
        
            # Long horizon
            D = runner(N=3, T=1000)
            for d in D:
                if d.T < 1000:
                    assert d.all_done[-1] == True
Ejemplo n.º 19
0
def test_diag_gaussian_head():
    with pytest.raises(AssertionError):
        env = make_gym_env('CartPole-v1', 0)
        env_spec = EnvSpec(env)
        DiagGaussianHead(None, None, 30, env_spec)

    env = make_gym_env('Pendulum-v0', 0)
    env_spec = EnvSpec(env)
    head = DiagGaussianHead(None, None, 30, env_spec)
    assert head.feature_dim == 30
    assert isinstance(head.mean_head, nn.Linear)
    assert isinstance(head.logstd_head, nn.Parameter)
    assert head.mean_head.in_features == 30 and head.mean_head.out_features == 1
    assert list(head.logstd_head.shape) == [1]
    assert torch.eq(head.logstd_head, torch.tensor(-0.510825624))
    dist = head(torch.randn(3, 30))
    assert isinstance(dist, Independent) and isinstance(dist.base_dist, Normal)
    assert list(dist.batch_shape) == [3]
    action = dist.sample()
    assert list(action.shape) == [3, 1]

    head = DiagGaussianHead(None, None, 30, env_spec, std_style='softplus')
    dist = head(torch.randn(3, 30))
    action = dist.sample()
    assert list(action.shape) == [3, 1]
    assert torch.eq(head.logstd_head, torch.tensor(-0.19587036834631966))

    head = DiagGaussianHead(None, None, 30, env_spec, std_style='sigmoidal')
    assert torch.eq(head.logstd_head, torch.tensor(-0.871222446472449))

    head = DiagGaussianHead(None, None, 30, env_spec, std_state_dependent=True)
    dist = head(torch.randn(3, 30))
    action = dist.sample()
    assert list(action.shape) == [3, 1]

    head = DiagGaussianHead(None, None, 30, env_spec, constant_std=0.3)
    dist = head(torch.randn(3, 30))
    action = dist.sample()
    assert list(action.shape) == [3, 1]
    assert not head.logstd_head.requires_grad
    assert torch.eq(head.logstd_head, torch.tensor([-1.2039728]))
Ejemplo n.º 20
0
    def __call__(self, config):
        # Set random seeds: PyTorch, numpy.random, random
        set_global_seeds(seed=config['seed'])
        
        # Create environment and seed it
        env = make_env(seed=config['seed'], 
                       monitor=False, 
                       monitor_dir=None)
        # Create environment specification
        env_spec = EnvSpec(env)  # TODO: integrate within make_env globally
        
        # Create device
        device = torch.device('cuda' if config['cuda'] else 'cpu')
        
        # Create logger
        logger = Logger(name='logger')
        
        # Create policy
        network = MLP(config=config)
        policy = CategoricalPolicy(network=network, env_spec=env_spec)
        policy.network = policy.network.to(device)

        # Create optimizer
        optimizer = optim.Adam(policy.network.parameters(), lr=config['lr'])
        # Learning rate scheduler
        max_epoch = config['train_iter']  # Max number of lr decay, Note where lr_scheduler put
        lambda_f = lambda epoch: 1 - epoch/max_epoch  # decay learning rate for each training epoch
        lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda_f)
        
        # Create agent
        agent_class = ActorCriticAgent#REINFORCEAgent
        agent = agent_class(policy=policy, 
                            optimizer=optimizer, 
                            config=config, 
                            lr_scheduler=lr_scheduler, 
                            device=device)
        
        # Create runner
        runner = Runner(agent=agent, 
                        env=env, 
                        gamma=config['gamma'])
        
        # Create engine
        engine = Engine(agent=agent, 
                        runner=runner, 
                        config=config, 
                        logger=logger)
        
        # Training
        train_output = engine.train()
        np.save('logs/returns_ActorCritic', train_output)
        
        return None
Ejemplo n.º 21
0
def test_constraint_action():
    env = make_gym_env('Pendulum-v0', 0)
    env_spec = EnvSpec(env)

    action = torch.tensor([1.5])
    assert torch.eq(constraint_action(env_spec, action), torch.tensor([1.5]))

    action = torch.tensor([3.0])
    assert torch.eq(constraint_action(env_spec, action), torch.tensor([2.0]))

    action = torch.tensor([-10.0])
    assert torch.eq(constraint_action(env_spec, action), torch.tensor([-2.0]))
Ejemplo n.º 22
0
    def test_env_spec(self):
        env = gym.make('CartPole-v1')
        env = GymWrapper(env)
        env.seed(0)

        env_spec = EnvSpec(env)
        assert isinstance(env_spec.observation_space, Box)
        assert isinstance(env_spec.action_space, Discrete)
        assert env_spec.control_type == 'Discrete'
        assert env_spec.T == 500
        assert env_spec.max_episode_reward == 475.0
        assert env_spec.reward_range == (-float('inf'), float('inf'))
Ejemplo n.º 23
0
def test_gae_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.completed[0] = True

    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.completed[1] = False

    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]
    D.completed[2] = True

    all_Vs = [
        torch.tensor([[0.1], [0.5], [1.0]]),
        torch.tensor([[1.1], [1.5], [2.0]]),
        torch.tensor([[2.1], [2.5], [3.0]]),
        torch.tensor([[3.1], [3.5], [4.0]]),
        torch.tensor([[4.1], [4.5], [5.0]]),
        torch.tensor([[5.1], [5.5], [6.0]]),
        torch.tensor([[6.1], [6.5], [7.0]]),
        torch.tensor([[7.1], [7.5], [8.0]])
    ]
    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    all_Vs = torch.stack(all_Vs, 1)

    out = gae_from_episode(D, all_Vs, last_Vs, 1.0, 0.5)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [3.725, 3.45, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [6.40625, 8.8125, 11.625, 15.25, 20.5, 0, 0, 0])
    assert np.allclose(out[2],
                       [5.84375, 7.6875, 9.375, 10.75, 11.5, 11., 8, 0.])
    del out

    out = gae_from_episode(D, all_Vs, last_Vs, 0.1, 0.2)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.03256, 1.128, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [0.665348, 0.7674, 0.87, 1, 2.5, 0, 0, 0])
    assert np.allclose(out[2], [
        0.206164098, 0.308204915, 0.410245728, 0.5122864, 0.61432, 0.716, 0.8,
        0
    ])

    with pytest.raises(AssertionError):
        gae_from_segment(D, all_Vs, last_Vs, 0.1, 0.2)
Ejemplo n.º 24
0
def test_episode_runner(vec_env, env_id):
    env = make_vec_env(vec_env, make_gym_env, env_id, 3, 0)
    env_spec = EnvSpec(env)

    if env_id == 'CartPole-v1':
        sticky_action = 1
    elif env_id == 'Pendulum-v0':
        sticky_action = [0.1]

    T = 30

    agent = StickyAgent(None, env_spec, sticky_action)
    runner = EpisodeRunner(None, agent, env)
    D = runner(T)

    assert D.N == 3
    assert D.maxT == max(D.Ts)

    seeder = Seeder(0)
    seed1, seed2, seed3 = seeder(3)
    env1 = make_gym_env(env_id, seed1)
    env2 = make_gym_env(env_id, seed2)
    env3 = make_gym_env(env_id, seed3)

    for n, ev in enumerate([env1, env2, env3]):
        obs = ev.reset()
        assert np.allclose(obs, D.observations[n][0])
        assert np.allclose(obs, D.numpy_observations[n, 0, ...])
        for t in range(T):
            obs, reward, done, info = ev.step(sticky_action)

            assert np.allclose(reward, D.rewards[n][t])
            assert np.allclose(reward, D.numpy_rewards[n, t])
            assert np.allclose(done, D.dones[n][t])
            assert done == D.numpy_dones[n, t]
            assert int(not done) == D.masks[n][t]
            assert int(not done) == D.numpy_masks[n, t]

            if done:
                assert np.allclose(obs, D.infos[n][t]['terminal_observation'])
                assert D.completes[n]
                assert np.allclose(0.0, D.numpy_observations[n, t + 1 + 1:,
                                                             ...])
                assert np.allclose(0.0, D.numpy_actions[n, t + 1:, ...])
                assert np.allclose(0.0, D.numpy_rewards[n, t + 1:])
                assert np.allclose(True, D.numpy_dones[n, t + 1:])
                assert np.allclose(0.0, D.numpy_masks[n, t + 1:])
                break
            else:
                assert np.allclose(obs, D.observations[n][t + 1])
Ejemplo n.º 25
0
def test_td0_error_from_episode():
    env = make_vec_env(SerialVecEnv, make_gym_env, 'CartPole-v1', 3, 0)
    env_spec = EnvSpec(env)

    D = BatchEpisode(env_spec)
    D.r[0] = [1, 2, 3]
    D.done[0] = [False, False, True]
    D.completed[0] = True

    D.r[1] = [1, 2, 3, 4, 5]
    D.done[1] = [False, False, False, False, False]
    D.completed[1] = False

    D.r[2] = [1, 2, 3, 4, 5, 6, 7, 8]
    D.done[2] = [False, False, False, False, False, False, False, True]
    D.completed[2] = True

    all_Vs = [
        torch.tensor([[0.1], [0.5], [1.0]]),
        torch.tensor([[1.1], [1.5], [2.0]]),
        torch.tensor([[2.1], [2.5], [3.0]]),
        torch.tensor([[3.1], [3.5], [4.0]]),
        torch.tensor([[4.1], [4.5], [5.0]]),
        torch.tensor([[5.1], [5.5], [6.0]]),
        torch.tensor([[6.1], [6.5], [7.0]]),
        torch.tensor([[7.1], [7.5], [8.0]])
    ]
    last_Vs = torch.tensor([10, 20, 30]).unsqueeze(1)

    all_Vs = torch.stack(all_Vs, 1)

    out = td0_error_from_episode(D, all_Vs, last_Vs, 1.0)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [2.0, 3, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [2, 3, 4, 5, 20.5, 0, 0, 0])
    assert np.allclose(out[2], [2, 3, 4, 5, 6, 7, 8, 0])
    del out

    out = td0_error_from_episode(D, all_Vs, last_Vs, 0.1)
    assert out.shape == (3, D.maxT)
    assert np.allclose(out[0], [1.01, 1.11, 0.9, 0, 0, 0, 0, 0])
    assert np.allclose(out[1], [0.65, 0.75, 0.85, 0.95, 2.5, 0, 0, 0])
    assert np.allclose(out[2], [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0])

    with pytest.raises(AssertionError):
        td0_error_from_segment(D, all_Vs, last_Vs, 0.1)
Ejemplo n.º 26
0
 def init(self, seed, config):
     # Make environment
     # Remember to seed it in each working function !
     self.env = make_vec_env(vec_env_class=SerialVecEnv, 
                             make_env=make_gym_env, 
                             env_id=config['env.id'], 
                             num_env=1, 
                             init_seed=seed)
     self.env_spec = EnvSpec(self.env)
     
     # Make agent
     self.network = Network(config=config, env_spec=self.env_spec)
     if self.env_spec.control_type == 'Discrete':
         self.policy = CategoricalPolicy(config=config, network=self.network, env_spec=self.env_spec)
     elif self.env_spec.control_type == 'Continuous':
         self.policy = GaussianPolicy(config=config, network=self.network, env_spec=self.env_spec)
     self.agent = Agent(policy=self.policy, config=config)
Ejemplo n.º 27
0
Archivo: algo.py Proyecto: vin136/lagom
    def _prepare(self, config):
        self.env = make_vec_env(SerialVecEnv, make_gym_env, config['env.id'],
                                config['train.N'], 0)
        self.env = VecClipAction(self.env)
        if config['env.standardize']:
            self.env = VecStandardize(self.env,
                                      use_obs=True,
                                      use_reward=False,
                                      clip_obs=10.0,
                                      clip_reward=10.0,
                                      gamma=0.99,
                                      eps=1e-08)
        self.env_spec = EnvSpec(self.env)

        self.device = torch.device('cpu')

        self.agent = Agent(config, self.env_spec, self.device)
Ejemplo n.º 28
0
def test_vec_monitor(env_id, vec_env_class, num_env, seed, runner_class):
    env = make_vec_env(vec_env_class, make_gym_env, env_id, num_env, seed)
    env_spec = EnvSpec(env)
    env = VecMonitor(env)
    agent = RandomAgent(None, env_spec)
    runner = runner_class(None, agent, env)
    D = runner(1050)

    for infos in D.infos:
        for info in infos:
            if 'terminal_observation' in info:
                assert 'episode' in info
                assert 'return' in info['episode'] and isinstance(
                    info['episode']['return'], np.float32)
                assert 'horizon' in info['episode'] and isinstance(
                    info['episode']['horizon'], np.int32)
                assert 'time' in info['episode'] and isinstance(
                    info['episode']['time'], float)
Ejemplo n.º 29
0
def test_rolling_segment_runner(vec_env, env_id):
    env = make_vec_env(vec_env, make_gym_env, env_id, 3, 0)
    env_spec = EnvSpec(env)

    if env_id == 'CartPole-v1':
        sticky_action = 1
    elif env_id == 'Pendulum-v0':
        sticky_action = [0.1]

    T = 30

    agent = StickyAgent(None, env_spec, sticky_action)
    runner = RollingSegmentRunner(None, agent, env)
    D = runner(T)

    assert D.N == 3
    assert D.T == T

    seeder = Seeder(0)
    seed1, seed2, seed3 = seeder(3)
    env1 = make_gym_env(env_id, seed1)
    env2 = make_gym_env(env_id, seed2)
    env3 = make_gym_env(env_id, seed3)

    for n, ev in enumerate([env1, env2, env3]):
        obs = ev.reset()
        assert np.allclose(obs, D.numpy_observations[n, 0, ...])
        for t in range(T):
            obs, reward, done, info = ev.step(sticky_action)
            if done:
                info['terminal_observation'] = obs
                obs = ev.reset()

            assert np.allclose(obs, D.numpy_observations[n, t + 1, ...])
            assert np.allclose(sticky_action, D.numpy_actions[n, t, ...])
            assert np.allclose(reward, D.numpy_rewards[n, t])
            assert done == D.numpy_dones[n, t]
            assert int(not done) == D.numpy_masks[n, t]

            if done:
                assert np.allclose(info['terminal_observation'],
                                   D.infos[n][t]['terminal_observation'])
Ejemplo n.º 30
0
    def test_vec_env(self, vec_env_class):
        # unpack class
        v_id, vec_env_class = vec_env_class

        venv = make_vec_env(vec_env_class, make_gym_env, 'CartPole-v1', 5, 1,
                            True)
        assert isinstance(venv, VecEnv)
        assert v_id in [0, 1]
        if v_id == 0:
            isinstance(venv, SerialVecEnv)
        elif v_id == 1:
            assert isinstance(venv, ParallelVecEnv)

        assert venv.num_env == 5
        assert not venv.closed and venv.viewer is None
        assert venv.unwrapped is venv
        assert isinstance(venv.observation_space, Box)
        assert isinstance(venv.action_space, Discrete)
        assert venv.T == 500
        assert venv.max_episode_reward == 475.0
        assert venv.reward_range == (-float('inf'), float('inf'))
        obs = venv.reset()
        assert len(obs) == 5
        assert np.asarray(obs).shape == (5, 4)
        assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]])
        a = [1] * 5
        obs, rewards, dones, infos = venv.step(a)
        assert all([len(item) == 5 for item in [obs, rewards, dones, infos]])
        assert all([not np.allclose(obs[0], obs[i]) for i in [1, 2, 3, 4]])

        # EnvSpec
        env_spec = EnvSpec(venv)
        assert isinstance(env_spec.action_space, Discrete)
        assert isinstance(env_spec.observation_space, Box)
        assert env_spec.control_type == 'Discrete'
        assert env_spec.T == 500
        assert env_spec.max_episode_reward == 475.0
        assert env_spec.reward_range == (-float('inf'), float('inf'))
        assert env_spec.is_vec_env

        venv.close()
        assert venv.closed