Exemple #1
0
        def check(agent_name, env_name):
            # Create environment
            list_make_env = make_envs(make_env=make_gym_env, 
                                      env_id=env_name, 
                                      num_env=1, 
                                      init_seed=0)
            env = SerialVecEnv(list_make_env=list_make_env)
            env_spec = EnvSpec(env)
            
            # Create agent
            if agent_name == 'random':
                agent = RandomAgent(env_spec=env_spec, config=None)
            elif agent_name == 'agent1':
                agent = Agent1(config=None)
            elif agent_name == 'agent2':
                agent = Agent2(config=None)
            else:
                raise ValueError('Wrong agent name')
            
            # Test: not allowed more than one environment for TrajectoryRunner
            with pytest.raises(AssertionError):
                list_make_env2 = make_envs(make_env=make_gym_env, 
                                          env_id=env_name, 
                                          num_env=2, 
                                          init_seed=0)
                env2 = SerialVecEnv(list_make_env=list_make_env2)

                runner2 = TrajectoryRunner(agent=agent, env=env2, gamma=1.0)
            
            # Create runner
            runner = TrajectoryRunner(agent=agent, env=env, gamma=1.0)

            # Small batch
            D = runner(N=3, T=4)

            assert len(D) == 3
            assert all([isinstance(d, Trajectory) for d in D])
            assert all([d.T == 4 for d in D])
            assert all([d.gamma == 1.0 for d in D])

            # Check additional information
            for d in D:
                for t in d.transitions:
                    if agent_name != 'random':
                        assert 'action_logprob' in t.info

            # Check if s in transition is equal to s_next in previous transition
            for d in D:
                for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
                    assert np.allclose(s1.s_next, s2.s)
        
            # Long horizon
            D = runner(N=3, T=1000)
            for d in D:
                if d.T < 1000:
                    assert d.all_done[-1] == True
Exemple #2
0
def test_sanity_check_env():
    env = SerialVecEnv(make_sanity_envs([2, 3]))
    assert isinstance(env.observation_space, Box) and isinstance(
        env.action_space, Discrete)
    obs = env.reset()
    assert np.allclose(obs, [[0.01], [0.01]])
    obs, reward, done, info = env.step([0, 1])
    assert np.allclose(obs, [[1.01], [1.01]])
    assert np.allclose(reward, [0.1, 0.1])
    assert np.allclose(done, [False, False])
    assert all(len(i) == 0 for i in info)

    obs, reward, done, info = env.step([1, 0])
    assert np.allclose(obs, [[0.01], [2.01]])
    assert np.allclose(reward, [0.2, 0.2])
    assert np.allclose(done, [True, False])
    assert info[0]['terminal_observation'] == [2.01] and len(info[1]) == 0

    obs, reward, done, info = env.step([1, 1])
    assert np.allclose(obs, [[1.01], [0.01]])
    assert np.allclose(reward, [0.1, 0.3])
    assert np.allclose(done, [False, True])
    assert len(info[0]) == 0 and info[1]['terminal_observation'] == [3.01]

    obs, reward, done, info = env.step([0, 0])
    assert np.allclose(obs, [[0.01], [1.01]])
    assert np.allclose(reward, [0.2, 0.1])
    assert np.allclose(done, [True, False])
    assert info[0]['terminal_observation'] == [2.01] and len(info[1]) == 0
Exemple #3
0
        def check(agent_name, env_name):
            # Create environment
            list_make_env = make_envs(make_env=make_gym_env, 
                                      env_id=env_name, 
                                      num_env=2, 
                                      init_seed=0)
            env = SerialVecEnv(list_make_env=list_make_env)
            env_spec = EnvSpec(env)
            assert env.num_env == 2

            # Create agent
            if agent_name == 'random':
                agent = RandomAgent(env_spec=env_spec, config=None)
            elif agent_name == 'agent1':
                agent = Agent1(config=None)
            elif agent_name == 'agent2':
                agent = Agent2(config=None)
            else:
                raise ValueError('Wrong agent name')

            # Create runner
            runner = SegmentRunner(agent=agent, env=env, gamma=1.0)

            # Small batch
            D = runner(T=3, reset=False)

            assert len(D) == 2
            assert all([isinstance(d, Segment) for d in D])
            assert all([d.T == 3 for d in D])
            assert all([d.gamma == 1.0 for d in D])

            # Check additional information
            for d in D:
                for t in d.transitions:
                    if agent_name != 'random':
                        assert 'action_logprob' in t.info

            # Check if s in transition is equal to s_next in previous transition
            for d in D:
                for s1, s2 in zip(d.transitions[:-1], d.transitions[1:]):
                    assert np.allclose(s1.s_next, s2.s)

            # Take one more step, test rolling effect, i.e. first state should be same as last state in previous D
            D2 = runner(T=1, reset=False)
            assert len(D2) == 2
            assert all([d.T == 1 for d in D2])
            for d, d2 in zip(D, D2):
                assert np.allclose(d2.all_s[0][0], d.transitions[-1].s_next)

            # Long horizon
            D = runner(T=200, reset=True)
            # Segment with identical time steps
            assert all([d.T == 200 for d in D])
            # For CartPole, 200 time steps, should be somewhere done=True
            if env_name == 'CartPole-v1':
                assert any([True in d.all_done for d in D])
                assert all([len(d.trajectories) > 1 for d in D])
Exemple #4
0
 def make_env_spec(self):
     list_make_env = make_envs(make_env=make_gym_env, 
                               env_id='CartPole-v1', 
                               num_env=3, 
                               init_seed=0)
     venv = SerialVecEnv(list_make_env=list_make_env, rolling=True)
     env_spec = EnvSpec(venv)
     
     return env_spec
Exemple #5
0
    def make_env_spec(self):
        list_make_env = make_envs(make_env=make_gym_env,
                                  env_id='Pendulum-v0',
                                  num_env=1,
                                  init_seed=0)
        venv = SerialVecEnv(list_make_env=list_make_env)
        env_spec = EnvSpec(venv)

        return env_spec
        def algorithm(config, seed, device):
            logdir = Path(config['log.dir']) / str(config['ID']) / str(seed)
            seeder = Seeder(seed)
            seeds = seeder(size=config['env.count'])
            env_constructors = []
            for seed in seeds:
                env_constructors.append(partial(CraftingEnv, seed))
            env = VecStandardize(SerialVecEnv(env_constructors),
                                 clip_reward=100.0)
            env_spec = EnvSpec(env)

            agent = Agent(config, env_spec, device)
            runner = RollingSegmentRunner(config, agent, env)
            engine = Engine(agent, runner, env)

            for i in range(config['train.iter']):
                training_result = engine.train(i)
                print(f'Training iteration {i} complete.')
                if i % config['log.interval'] == 0:
                    logs = engine.log_train(training_result)
                    pickle_dump(obj=logs, f=logdir / f'iter_{i}_train_logs', ext='.pkl')
                    torch.save(engine.agent.policy.state_dict(),
                               logdir / 'trained_params')
Exemple #7
0
from dial_control_rl import agent
from dial_control_rl.env import CraftingEnv

from lagom.utils import Seeder
from lagom.envs.vec_env import SerialVecEnv, VecStandardize
from lagom.envs import EnvSpec
from functools import partial

env = CraftingEnv()

seeder = Seeder(0)
seeds = seeder(size=1)
env_constructors = []
for seed in seeds:
    env_constructors.append(partial(CraftingEnv, seed))
env = VecStandardize(SerialVecEnv(env_constructors), clip_reward=100.0)
env_spec = EnvSpec(env)

policy = agent.Policy({'algo.rl': 0}, env_spec, torch.device('cpu'))
policy.load_state_dict(params)
policy = policy.double()


def V(x):
    out = policy(torch.tensor(x), ['V'])
    return out['V'][0]


def Q(x):
    out = policy(torch.tensor(x), ['action_dist'])
    out = out['action_dist']
Exemple #8
0
    def __call__(self, config):
        # Set random seeds: PyTorch, numpy.random, random
        set_global_seeds(seed=config['seed'])

        # Make a list of make_env functions
        list_make_env = make_envs(make_env=make_gym_env,
                                  env_id=config['env:id'],
                                  num_env=config['train:N'],
                                  init_seed=config['seed'] * 2)
        # Create vectorized environment
        env = SerialVecEnv(list_make_env=list_make_env)
        # Create environment specification
        env_spec = EnvSpec(env)

        # Create device
        device = torch.device(
            f'cuda:{config["cuda_id"]}' if config['cuda'] else 'cpu')

        # Create policy
        network = MLP(config=config).to(device)
        policy = CategoricalPolicy(network=network, env_spec=env_spec)

        # Create optimizer
        optimizer = optim.Adam(policy.network.parameters(),
                               lr=config['algo:lr'])
        # Create learning rate scheduler
        if config['algo:use_lr_scheduler']:
            max_epoch = config[
                'train:iter']  # Max number of lr decay, Note where lr_scheduler put
            lambda_f = lambda epoch: 1 - epoch / max_epoch  # decay learning rate for each training epoch
            lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer,
                                                       lr_lambda=lambda_f)

        # Create agent
        kwargs = {'device': device}
        if config['algo:use_lr_scheduler']:
            kwargs['lr_scheduler'] = lr_scheduler
        agent = A2CAgent(policy=policy,
                         optimizer=optimizer,
                         config=config,
                         **kwargs)

        # Create runner
        runner = SegmentRunner(agent=agent,
                               env=env,
                               gamma=config['algo:gamma'])

        # Create engine
        engine = Engine(agent=agent, runner=runner, config=config, logger=None)

        # Training and evaluation
        train_logs = []
        eval_logs = []
        for i in range(config['train:iter']):
            train_output = engine.train(i)

            # Logging and evaluation
            if i == 0 or (i + 1) % config['log:interval'] == 0:
                # Log training and record the loggings
                train_logger = engine.log_train(train_output)
                train_logs.append(train_logger.logs)
                # Log evaluation and record the loggings
                eval_output = engine.eval(i)
                eval_logger = engine.log_eval(eval_output)
                eval_logs.append(eval_logger.logs)

        # Save the loggings
        np.save(
            Path(config['log:dir']) / str(config['ID']) / 'train', train_logs)
        np.save(
            Path(config['log:dir']) / str(config['ID']) / 'eval', eval_logs)

        return None