def test_vec_env_monitor_kwargs(): env = make_vec_env("MountainCarContinuous-v0", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": False}) assert env.get_attr("allow_early_resets")[0] is False env = make_atari_env("BreakoutNoFrameskip-v4", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": False}) assert env.get_attr("allow_early_resets")[0] is False env = make_vec_env("MountainCarContinuous-v0", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": True}) assert env.get_attr("allow_early_resets")[0] is True env = make_atari_env( "BreakoutNoFrameskip-v4", n_envs=1, seed=0, monitor_kwargs={"allow_early_resets": True}, ) assert env.get_attr("allow_early_resets")[0] is True
def main(config: str, agent: str): with open(config) as fp: json_data = json.load(fp) config = GameConfig.deserialize(json_data) log_dir = config.agents_config[agent]["save_path"] if agent == "DQN": env = make_atari_env(config.game_name, n_envs=1, seed=0, monitor_dir=log_dir) elif agent == "PPO": env = make_atari_env(config.game_name, n_envs=8, seed=0, monitor_dir=log_dir) else: env = make_atari_env(config.game_name, n_envs=16, seed=0, monitor_dir=log_dir) env = VecFrameStack(env, n_stack=4) agent = AgentLoader.get_agent(agent, config.agents_config, env) reward_callback = SaveOnBestTrainingRewardCallback( check_freq=100, log_dir=log_dir) start_time = time.time() steps = 10_000_000 with ProgressBarManager_new(steps) as progress_callback: agent.agent.learn(total_timesteps=steps, callback=[ reward_callback, progress_callback]) # agent.save() env.close() end_time = time.time() - start_time print(f'\n The Training Took {end_time} seconds')
def test_make_atari_env(env_id, n_envs, wrapper_kwargs): env_id = "BreakoutNoFrameskip-v4" env = make_atari_env(env_id, n_envs, wrapper_kwargs=wrapper_kwargs, monitor_dir=None, seed=0) assert env.num_envs == n_envs obs = env.reset() new_obs, reward, _, _ = env.step( [env.action_space.sample() for _ in range(n_envs)]) assert obs.shape == new_obs.shape # Wrapped into DummyVecEnv wrapped_atari_env = env.envs[0] if wrapper_kwargs is not None: assert obs.shape == (n_envs, 60, 60, 1) assert wrapped_atari_env.observation_space.shape == (60, 60, 1) assert not isinstance(wrapped_atari_env.env, ClipRewardEnv) else: assert obs.shape == (n_envs, 84, 84, 1) assert wrapped_atari_env.observation_space.shape == (84, 84, 1) assert isinstance(wrapped_atari_env.env, ClipRewardEnv) assert np.max(np.abs(reward)) < 1.0
def eval_env_constructor(n_envs=1): """ Evaluation should be in a scalar environment. """ env = make_atari_env("MontezumaRevenge-v0", n_envs=n_envs) env = VecFrameStack(env, n_stack=4) env = ScalarizeEnvWrapper(env) return env
def create_environment(config): if config.atari_wrapper: env = make_atari_env(config.environment, n_envs=config.workers) env = VecFrameStack(env, n_stack = 1) else: env = make_vec_env(config.environment, n_envs=config.workers) env = DummyEnvWrapper(env, config.add_stoch) return env
def get_env(): env = make_atari_env(atari_env_name('pong', 'image', 'v4', no_frame_skip=True), n_envs=4, seed=0) env = VecFrameStack(env, n_stack=4) return env
def atari_make(env_name, scalarize=True, **kwargs): from stable_baselines3.common.env_util import make_atari_env from stable_baselines3.common.vec_env import VecFrameStack env = make_atari_env(env_id=env_name, **kwargs) env = VecFrameStack(env, n_stack=4) if scalarize: from rlberry.wrappers.scalarize import ScalarizeEnvWrapper env = ScalarizeEnvWrapper(env) return env
def train_and_test_ec(config, video_length_=1000, total_timesteps_=10000): print(config) if config.atari_wrapper: train_env = make_atari_env(config.environment, n_envs=config.workers) train_env = VecFrameStack(train_env, n_stack=1) shape = (84, 84, 1) else: train_env = make_vec_env(config.environment, n_envs=config.workers) shape = train_env.observation_space.shape rnet = RNetwork(shape, config.ensemble_size) vec_episodic_memory = [ EpisodicMemory([64], rnet.embedding_similarity, replacement='random', capacity=200) for _ in range(config.workers) ] target_image_shape = list(shape) #assert type(config.add_stoch) == bool, "Please indicated whether or not you want stoch added" train_env = CuriosityEnvWrapper(train_env, vec_episodic_memory, rnet.embed_observation, target_image_shape, config.add_stoch) r_network_trainer = RNetworkTrainer(rnet, learning_rate=config.rnet_lr, observation_history_size=2000, training_interval=1000) train_env.add_observer(r_network_trainer) tb_dir = os.path.join(config.log_dir, config.tb_subdir) model = config.agent(config.policy_model, train_env, config, verbose=config.verbose, tensorboard_log=tb_dir) model.learn(total_timesteps=total_timesteps_) print("stopped to learn") #model.save("models/"+config.experiment) obs = train_env.reset() for i in range(video_length_ + 1): action, _states = model.predict(obs, deterministic=True) obs, reward, done, info = train_env.step(action) train_env.render() if done.any(): obs = train_env.reset() train_env.close()
def atari_games_example(): # There already exists an environment generator that will make and wrap atari environments correctly. # Here we are also multi-worker training (n_envs=4 => 4 environments). env = make_atari_env("PongNoFrameskip-v4", n_envs=4, seed=0) # Frame-stacking with 4 frames. env = VecFrameStack(env, n_stack=4) model = A2C("CnnPolicy", env, verbose=1) model.learn(total_timesteps=25_000) obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()
def run_dqn_baseline(): env = make_atari_env('BreakoutNoFrameskip-v4', n_envs=1, seed=0) env = VecFrameStack(env, n_stack=4) tensorboard_log = os.path.join(os.path.dirname(__file__), 'runs_baseline') buffer_size = 100000 num_training_steps = 1000000 model = DQN('CnnPolicy', env, verbose=0, buffer_size=buffer_size, learning_starts=50000, optimize_memory_usage=False, tensorboard_log=tensorboard_log) model.learn(total_timesteps=num_training_steps) obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()
def record_video(env_id, model, video_length=500, prefix='', video_folder='videos/'): """ :param env_id: (str) :param model: (RL model) :param video_length: (int) :param prefix: (str) :param video_folder: (str) """ print("Did you even try?") eval_env = make_atari_env(env_id, n_envs=nEnv, seed=0) eval_env = VecFrameStack(eval_env, n_stack=4) obs = eval_env.reset() for _ in range(video_length): action, _states = model.predict(obs) obs, rewards, dones, info = eval_env.step(action) eval_env.render()
def train_and_test_icm(config, video_length_=1000, total_timesteps_=10000): if config.atari_wrapper: train_env = make_atari_env(config.environment, n_envs=config.workers) else: train_env = make_vec_env(config.environment, n_envs=config.workers) icm = ICM(train_env.observation_space.shape, config.action_shape, ensemble_size=config.ensemble_size, use_atari_wrapper=config.atari_wrapper) is_atari_environment = True target_image_shape = list(train_env.observation_space.shape) #assert type(config.add_stoch) == bool, "Please indicated whether or not you want stoch added" train_env = ICMCuriosityEnvWrapper(train_env, icm.reward, icm.forward, target_image_shape, config.add_stoch) icm_trainer = ICMTrainer(icm, observation_history_size=2000, training_interval=500) train_env.add_observer(icm_trainer) tb_dir = os.path.join(config.log_dir, config.tb_subdir) model = config.agent(config.policy_model, train_env, config, verbose=config.verbose, tensorboard_log=tb_dir) #model.learn(total_timesteps=config.total_timesteps) model.learn(total_timesteps=total_timesteps_) obs = train_env.reset() for i in range(video_length_ + 1): action, _states = model.predict(obs, deterministic=True) obs, reward, done, info = train_env.step(action) train_env.render() if done.any(): obs = train_env.reset() train_env.close()
action, _states = model.predict(obs) obs, rewards, dones, info = eval_env.step(action) eval_env.render() # Close the video recorder # eval_env.close() # Stack 4 frames env_id = 'PongNoFrameskip-v4' video_folder = 'logs/videos/' video_length = 1000 nEnv = 8 startFresh = False if (startFresh): env = make_atari_env(env_id, n_envs=nEnv, seed=0) env = VecFrameStack(env, n_stack=4) env.reset() model = A2C('CnnPolicy', env, verbose=1) model.learn(total_timesteps=25000) model.save("a2c_pong_{}".format(model.num_timesteps)) record_video(env_id, model, video_length=500, prefix='ac2_' + env_id, video_folder='videos/') else: env = make_atari_env(env_id, n_envs=nEnv, seed=0) env = VecFrameStack(env, n_stack=4) env.reset() trained_model = A2C.load("a2c_pong_200000", verbose=1)
def env_constructor(n_envs=4): env = make_atari_env("MontezumaRevenge-v0", n_envs=n_envs) env = VecFrameStack(env, n_stack=4) return env
- There is also NoFrameskip-v4 with no frame skip and no action repeat stochasticity. Αναλυτικότερα στην εκφώνηση της άσκησης. """ atari_env_name='Berzerk-v4' """## Δημιουργία περιβάλλοντος""" from stable_baselines3.common.env_util import make_atari_env from stable_baselines3.common.vec_env import VecFrameStack # Με τις συναρτήσεις που ακολουθούν κάνουμε την ίδια προεπεξεργασία με την DeepMind # Here we are also multi-worker training (n_envs=4 => 4 environments), The model must support Multi Processing env = make_atari_env(atari_env_name, n_envs=1, seed=0) # Frame-stacking with 4 frames. Με 1 frame ο αλγόριθμος ξέρει τη θέση των πραγμάτων, με 2 frames την ταχύτητα, με 3 την επιτάχυνση και με 4 το jerk env = VecFrameStack(env, n_stack=4) # Test environment must be unique test_env = make_atari_env(atari_env_name, n_envs=1, seed=0) # Frame-stacking with 4 frames test_env = VecFrameStack(test_env, n_stack=4) """## Εκπαίδευση Θα εκπαιδεύσουμε ένα δίκτυο deep q-learning (DQN) όπως αυτό της Deepmind. Σημειώστε ότι τα timesteps είναι πολύ λίγα και ότι δεν κάνουμε διερεύνηση στις παραμέτρους του μοντέλου που μπορείτε να βρείτε [εδώ](https://stable-baselines3.readthedocs.io/en/master/modules/dqn.html#parameters). """ import datetime # For filenames while logging from stable_baselines3 import DQN
# Effective code with PPO package import gym from stable_baselines3.common.env_util import make_atari_env from stable_baselines3.common.vec_env import VecFrameStack from stable_baselines3 import PPO env = make_atari_env('Assault-v0', n_envs=1, seed=0) env = VecFrameStack(env, n_stack=1) model = PPO('MlpPolicy', env, verbose=1, tensorboard_log='./PPO_log/') model.learn(total_timesteps=int(3e4)) obs = env.reset() obs_ = obs.transpose(3, 0, 1, 2) while True: action, _states = model.predict(obs_) obs, rewards, dones, info = env.step(action) env.render()
import stable_baselines3 import gym from stable_baselines3 import A2C from stable_baselines3.common.callbacks import StopTrainingOnRewardThreshold, EvalCallback from stable_baselines3.common.env_util import make_vec_env from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.env_util import make_atari_env from stable_baselines3.common.vec_env import VecFrameStack env = make_atari_env('PongNoFrameskip-v4', n_envs=32, seed=0) # Stack 4 frames env = VecFrameStack(env, n_stack=1) # # Create environment # env_id = 'CartPole-v1' # eval_env = gym.make(env_id) # env = make_vec_env(env_id, n_envs=16, seed=0) # # Instantiate the agent # model = A2C('MlpPolicy', env, verbose=1,seed=0) model = A2C('CnnPolicy', env, verbose=1, seed=0) # callback_on_best = StopTrainingOnRewardThreshold(reward_threshold=475, verbose=1) # eval_callback = EvalCallback(eval_env, callback_on_new_best=callback_on_best, verbose=1) # Train the agent model.learn(total_timesteps=int(1e7)) model.save("a2c_pong")