Exemple #1
0
def test_vec_env_monitor_kwargs():
    env = make_vec_env("MountainCarContinuous-v0",
                       n_envs=1,
                       seed=0,
                       monitor_kwargs={"allow_early_resets": False})
    assert env.get_attr("allow_early_resets")[0] is False

    env = make_atari_env("BreakoutNoFrameskip-v4",
                         n_envs=1,
                         seed=0,
                         monitor_kwargs={"allow_early_resets": False})
    assert env.get_attr("allow_early_resets")[0] is False

    env = make_vec_env("MountainCarContinuous-v0",
                       n_envs=1,
                       seed=0,
                       monitor_kwargs={"allow_early_resets": True})
    assert env.get_attr("allow_early_resets")[0] is True

    env = make_atari_env(
        "BreakoutNoFrameskip-v4",
        n_envs=1,
        seed=0,
        monitor_kwargs={"allow_early_resets": True},
    )
    assert env.get_attr("allow_early_resets")[0] is True
Exemple #2
0
def main(config: str, agent: str):
    with open(config) as fp:
        json_data = json.load(fp)

    config = GameConfig.deserialize(json_data)
    log_dir = config.agents_config[agent]["save_path"]
    if agent == "DQN":
        env = make_atari_env(config.game_name, n_envs=1,
                             seed=0, monitor_dir=log_dir)

    elif agent == "PPO":
        env = make_atari_env(config.game_name, n_envs=8,
                             seed=0, monitor_dir=log_dir)

    else:
        env = make_atari_env(config.game_name, n_envs=16,
                             seed=0, monitor_dir=log_dir)

    env = VecFrameStack(env, n_stack=4)

    agent = AgentLoader.get_agent(agent, config.agents_config, env)

    reward_callback = SaveOnBestTrainingRewardCallback(
        check_freq=100, log_dir=log_dir)

    start_time = time.time()
    steps = 10_000_000
    with ProgressBarManager_new(steps) as progress_callback:
        agent.agent.learn(total_timesteps=steps, callback=[
                          reward_callback, progress_callback])
        # agent.save()
        env.close()

    end_time = time.time() - start_time
    print(f'\n The Training Took {end_time} seconds')
Exemple #3
0
def test_make_atari_env(env_id, n_envs, wrapper_kwargs):
    env_id = "BreakoutNoFrameskip-v4"
    env = make_atari_env(env_id,
                         n_envs,
                         wrapper_kwargs=wrapper_kwargs,
                         monitor_dir=None,
                         seed=0)

    assert env.num_envs == n_envs

    obs = env.reset()

    new_obs, reward, _, _ = env.step(
        [env.action_space.sample() for _ in range(n_envs)])

    assert obs.shape == new_obs.shape

    # Wrapped into DummyVecEnv
    wrapped_atari_env = env.envs[0]
    if wrapper_kwargs is not None:
        assert obs.shape == (n_envs, 60, 60, 1)
        assert wrapped_atari_env.observation_space.shape == (60, 60, 1)
        assert not isinstance(wrapped_atari_env.env, ClipRewardEnv)
    else:
        assert obs.shape == (n_envs, 84, 84, 1)
        assert wrapped_atari_env.observation_space.shape == (84, 84, 1)
        assert isinstance(wrapped_atari_env.env, ClipRewardEnv)
        assert np.max(np.abs(reward)) < 1.0
Exemple #4
0
def eval_env_constructor(n_envs=1):
    """
    Evaluation should be in a scalar environment.
    """
    env = make_atari_env("MontezumaRevenge-v0", n_envs=n_envs)
    env = VecFrameStack(env, n_stack=4)
    env = ScalarizeEnvWrapper(env)
    return env
Exemple #5
0
def create_environment(config):
    if config.atari_wrapper:
        env = make_atari_env(config.environment, n_envs=config.workers)
        env = VecFrameStack(env, n_stack = 1)
    else:
        env = make_vec_env(config.environment, n_envs=config.workers)
    env = DummyEnvWrapper(env, config.add_stoch)
    return env
Exemple #6
0
def get_env():
    env = make_atari_env(atari_env_name('pong',
                                        'image',
                                        'v4',
                                        no_frame_skip=True),
                         n_envs=4,
                         seed=0)
    env = VecFrameStack(env, n_stack=4)
    return env
Exemple #7
0
def atari_make(env_name, scalarize=True, **kwargs):
    from stable_baselines3.common.env_util import make_atari_env
    from stable_baselines3.common.vec_env import VecFrameStack
    env = make_atari_env(env_id=env_name, **kwargs)
    env = VecFrameStack(env, n_stack=4)
    if scalarize:
        from rlberry.wrappers.scalarize import ScalarizeEnvWrapper
        env = ScalarizeEnvWrapper(env)
    return env
def train_and_test_ec(config, video_length_=1000, total_timesteps_=10000):
    print(config)
    if config.atari_wrapper:
        train_env = make_atari_env(config.environment, n_envs=config.workers)
        train_env = VecFrameStack(train_env, n_stack=1)
        shape = (84, 84, 1)
    else:
        train_env = make_vec_env(config.environment, n_envs=config.workers)
        shape = train_env.observation_space.shape

    rnet = RNetwork(shape, config.ensemble_size)
    vec_episodic_memory = [
        EpisodicMemory([64],
                       rnet.embedding_similarity,
                       replacement='random',
                       capacity=200) for _ in range(config.workers)
    ]
    target_image_shape = list(shape)
    #assert type(config.add_stoch) == bool, "Please indicated whether or not you want stoch added"
    train_env = CuriosityEnvWrapper(train_env, vec_episodic_memory,
                                    rnet.embed_observation, target_image_shape,
                                    config.add_stoch)
    r_network_trainer = RNetworkTrainer(rnet,
                                        learning_rate=config.rnet_lr,
                                        observation_history_size=2000,
                                        training_interval=1000)
    train_env.add_observer(r_network_trainer)
    tb_dir = os.path.join(config.log_dir, config.tb_subdir)
    model = config.agent(config.policy_model,
                         train_env,
                         config,
                         verbose=config.verbose,
                         tensorboard_log=tb_dir)

    model.learn(total_timesteps=total_timesteps_)

    print("stopped to learn")
    #model.save("models/"+config.experiment)

    obs = train_env.reset()

    for i in range(video_length_ + 1):

        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = train_env.step(action)
        train_env.render()
        if done.any():
            obs = train_env.reset()

    train_env.close()
Exemple #9
0
def atari_games_example():
    # There already exists an environment generator that will make and wrap atari environments correctly.
    # Here we are also multi-worker training (n_envs=4 => 4 environments).
    env = make_atari_env("PongNoFrameskip-v4", n_envs=4, seed=0)
    # Frame-stacking with 4 frames.
    env = VecFrameStack(env, n_stack=4)

    model = A2C("CnnPolicy", env, verbose=1)
    model.learn(total_timesteps=25_000)

    obs = env.reset()
    while True:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
def run_dqn_baseline():
    env = make_atari_env('BreakoutNoFrameskip-v4', n_envs=1, seed=0)
    env = VecFrameStack(env, n_stack=4)
    tensorboard_log = os.path.join(os.path.dirname(__file__), 'runs_baseline')
    buffer_size = 100000
    num_training_steps = 1000000

    model = DQN('CnnPolicy',
                env,
                verbose=0,
                buffer_size=buffer_size,
                learning_starts=50000,
                optimize_memory_usage=False,
                tensorboard_log=tensorboard_log)
    model.learn(total_timesteps=num_training_steps)

    obs = env.reset()
    while True:
        action, _states = model.predict(obs)
        obs, rewards, dones, info = env.step(action)
        env.render()
def record_video(env_id,
                 model,
                 video_length=500,
                 prefix='',
                 video_folder='videos/'):
    """
    :param env_id: (str)
    :param model: (RL model)
    :param video_length: (int)
    :param prefix: (str)
    :param video_folder: (str)
    """
    print("Did you even try?")
    eval_env = make_atari_env(env_id, n_envs=nEnv, seed=0)
    eval_env = VecFrameStack(eval_env, n_stack=4)

    obs = eval_env.reset()
    for _ in range(video_length):
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        eval_env.render()
def train_and_test_icm(config, video_length_=1000, total_timesteps_=10000):
    if config.atari_wrapper:
        train_env = make_atari_env(config.environment, n_envs=config.workers)
    else:
        train_env = make_vec_env(config.environment, n_envs=config.workers)

    icm = ICM(train_env.observation_space.shape,
              config.action_shape,
              ensemble_size=config.ensemble_size,
              use_atari_wrapper=config.atari_wrapper)
    is_atari_environment = True
    target_image_shape = list(train_env.observation_space.shape)
    #assert type(config.add_stoch) == bool, "Please indicated whether or not you want stoch added"
    train_env = ICMCuriosityEnvWrapper(train_env, icm.reward, icm.forward,
                                       target_image_shape, config.add_stoch)
    icm_trainer = ICMTrainer(icm,
                             observation_history_size=2000,
                             training_interval=500)
    train_env.add_observer(icm_trainer)
    tb_dir = os.path.join(config.log_dir, config.tb_subdir)
    model = config.agent(config.policy_model,
                         train_env,
                         config,
                         verbose=config.verbose,
                         tensorboard_log=tb_dir)

    #model.learn(total_timesteps=config.total_timesteps)

    model.learn(total_timesteps=total_timesteps_)

    obs = train_env.reset()
    for i in range(video_length_ + 1):

        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = train_env.step(action)
        train_env.render()
        if done.any():
            obs = train_env.reset()
    train_env.close()
        action, _states = model.predict(obs)
        obs, rewards, dones, info = eval_env.step(action)
        eval_env.render()

    # Close the video recorder
    # eval_env.close()


# Stack 4 frames
env_id = 'PongNoFrameskip-v4'
video_folder = 'logs/videos/'
video_length = 1000
nEnv = 8
startFresh = False
if (startFresh):
    env = make_atari_env(env_id, n_envs=nEnv, seed=0)
    env = VecFrameStack(env, n_stack=4)
    env.reset()
    model = A2C('CnnPolicy', env, verbose=1)
    model.learn(total_timesteps=25000)
    model.save("a2c_pong_{}".format(model.num_timesteps))
    record_video(env_id,
                 model,
                 video_length=500,
                 prefix='ac2_' + env_id,
                 video_folder='videos/')
else:
    env = make_atari_env(env_id, n_envs=nEnv, seed=0)
    env = VecFrameStack(env, n_stack=4)
    env.reset()
    trained_model = A2C.load("a2c_pong_200000", verbose=1)
Exemple #14
0
def env_constructor(n_envs=4):
    env = make_atari_env("MontezumaRevenge-v0", n_envs=n_envs)
    env = VecFrameStack(env, n_stack=4)
    return env
- There is also NoFrameskip-v4 with no frame skip and no action repeat stochasticity.

Αναλυτικότερα στην εκφώνηση της άσκησης.
"""

atari_env_name='Berzerk-v4'

"""## Δημιουργία περιβάλλοντος"""

from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack

# Με τις συναρτήσεις που ακολουθούν κάνουμε την ίδια προεπεξεργασία με την DeepMind

# Here we are also multi-worker training (n_envs=4 => 4 environments), The model must support Multi Processing
env = make_atari_env(atari_env_name, n_envs=1, seed=0)
# Frame-stacking with 4 frames. Με 1 frame ο αλγόριθμος ξέρει τη θέση των πραγμάτων, με 2 frames την ταχύτητα, με 3 την επιτάχυνση και με 4 το jerk
env = VecFrameStack(env, n_stack=4)
# Test environment must be unique
test_env = make_atari_env(atari_env_name, n_envs=1, seed=0)
# Frame-stacking with 4 frames
test_env = VecFrameStack(test_env, n_stack=4)

"""## Εκπαίδευση

Θα εκπαιδεύσουμε ένα δίκτυο deep q-learning (DQN) όπως αυτό της Deepmind. Σημειώστε ότι τα timesteps είναι πολύ λίγα και ότι δεν κάνουμε διερεύνηση στις παραμέτρους του μοντέλου που μπορείτε να βρείτε [εδώ](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html#parameters).
"""

import datetime # For filenames while logging
from stable_baselines3 import DQN
Exemple #16
0
# Effective code with PPO package
import gym

from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3 import PPO

env = make_atari_env('Assault-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=1)

model = PPO('MlpPolicy', env, verbose=1, tensorboard_log='./PPO_log/')
model.learn(total_timesteps=int(3e4))

obs = env.reset()
obs_ = obs.transpose(3, 0, 1, 2)

while True:
    action, _states = model.predict(obs_)
    obs, rewards, dones, info = env.step(action)
    env.render()
Exemple #17
0
import stable_baselines3
import gym

from stable_baselines3 import A2C
from stable_baselines3.common.callbacks import StopTrainingOnRewardThreshold, EvalCallback
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack

env = make_atari_env('PongNoFrameskip-v4', n_envs=32, seed=0)
# Stack 4 frames
env = VecFrameStack(env, n_stack=1)

# # Create environment
# env_id = 'CartPole-v1'
# eval_env = gym.make(env_id)
# env = make_vec_env(env_id, n_envs=16, seed=0)
# # Instantiate the agent
# model = A2C('MlpPolicy', env, verbose=1,seed=0)
model = A2C('CnnPolicy', env, verbose=1, seed=0)
# callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=475, verbose=1)
# eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1)
# Train the agent
model.learn(total_timesteps=int(1e7))
model.save("a2c_pong")