c.save_freq = None c.eval_freq = None c.eval_env = RogueEnvExt( RogueEnv( config_dict=rogue_config(2), mex_steps=500, stair_reward=50.0, image_setting=EXPAND, )) c.set_optimizer(lambda params: Adam(params, lr=1.0e-4, eps=1.0e-8)) CNN_PARAM = [(8, 1), (4, 1), (3, 1)] c.set_net_fn( "actor-critic", rainy.net.actor_critic.ac_conv( kernel_and_strides=CNN_PARAM, output_dim=256, ), ) c.nworkers = 32 c.nsteps = 125 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4 return c if __name__ == "__main__": cli.run_cli(config, rainy.agents.PPOAgent, script_path=os.path.realpath(__file__))
from torch.optim import RMSprop def config() -> Config: c = Config() env_use = "Pong" c.set_env(lambda: Atari(env_use, frame_stack=False)) c.set_optimizer( lambda params: RMSprop(params, lr=7e-4, alpha=0.99, eps=1e-5)) c.set_net_fn('actor-critic', net.actor_critic.ac_conv(rnn=net.GruBlock)) #c.set_net_fn('actor-critic', net.actor_critic.ac_conv()) c.nworkers = 16 c.nsteps = 5 c.set_parallel_env(atari_parallel()) c.grad_clip = 0.5 c.value_loss_weight = 0.5 c.use_gae = False c.max_steps = int(2e7) c.eval_env = Atari(env_use) c.use_reward_monitor = True c.eval_deterministic = False c.episode_log_freq = 100 c.eval_freq = None c.save_freq = None print("GRU on Pong!") return c if __name__ == '__main__': cli.run_cli(config(), A2cAgent, script_path=os.path.realpath(__file__))
import os from rainy import Config from rainy.agents import PpoAgent import rainy.utils.cli as cli from rogue_gym.envs import ImageSetting, StatusFlag, DungeonType from torch.optim import Adam from env import set_env from net import a2c_conv EXPAND = ImageSetting(dungeon=DungeonType.SYMBOL, status=StatusFlag.EMPTY) AGENT = PpoAgent def config() -> Config: c = Config() set_env(c, EXPAND) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.set_net_fn('actor-critic', a2c_conv()) c.grad_clip = 0.5 c.episode_log_freq = 100 c.eval_deterministic = False return c if __name__ == '__main__': cli.run_cli(config(), AGENT, script_path=os.path.realpath(__file__))
import os import rainy.utils.cli as cli from rainy.envs import Atari, atari_parallel from int_rew import rnd def config(envname: str = "MontezumaRevenge") -> rnd.RNDConfig: c = rnd.RNDConfig() c.set_env(lambda: Atari(envname, cfg=rnd.atari_config(), frame_stack=False)) c.set_parallel_env(atari_parallel()) c.max_steps = int(1e8) * 6 c.grad_clip = 1.0 # ppo params c.nworkers = 64 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4 c.auxloss_use_ratio = min(1.0, 32.0 / c.nworkers) c.use_reward_monitor = True # eval settings c.eval_env = Atari(envname, cfg=rnd.atari_config()) c.episode_log_freq = 1000 c.eval_freq = None c.save_freq = int(1e8) return c if __name__ == "__main__": cli.run_cli(config, rnd.RNDAgent, script_path=os.path.realpath(__file__))
from os.path import realpath import ppo_atari import rainy from rainy.envs import Atari, atari_parallel import rainy.utils.cli as cli def config() -> rainy.Config: c = ppo_atari.config() c.set_env(lambda: Atari('Breakout', flicker_frame=True, frame_stack=False)) c.set_parallel_env(atari_parallel(frame_stack=False)) c.set_net_fn('actor-critic', rainy.net.actor_critic.ac_conv(rnn=rainy.net.GruBlock)) c.eval_env = Atari('Breakout', frame_stack=False) return c if __name__ == '__main__': cli.run_cli(config(), rainy.agents.PpoAgent, script_path=realpath(__file__))
import os import rainy from rainy.utils.cli import run_cli from rainy.envs import MultiProcEnv from torch.optim import Adam def config() -> rainy.Config: c = rainy.Config() c.max_steps = int(1e6) c.nworkers = 8 c.nsteps = 32 c.set_parallel_env(MultiProcEnv) c.set_optimizer(lambda params: Adam(params, lr=2.5e-4, eps=1.0e-4)) c.value_loss_weight = 0.1 c.grad_clip = 0.1 c.gae_lambda = 0.95 c.ppo_minibatch_size = 64 c.use_gae = True c.ppo_clip = 0.2 c.eval_freq = None # c.set_net_fn('actor-critic', rainy.net.actor_critic.fc_shared(rnn=rainy.net.GruBlock)) return c if __name__ == '__main__': run_cli(config(), rainy.agents.PpoAgent, script_path=os.path.realpath(__file__))
lambda: Atari(envname, cfg=rnd.atari_config(), frame_stack=False)) c.set_optimizer(lambda params: Adam(params, lr=rnd_lr), key="rnd_separated") c.set_parallel_env(atari_parallel()) c.max_steps = int(1e8) * 6 c.grad_clip = 1.0 # ppo params c.nworkers = 64 c.nsteps = 128 c.value_loss_weight = 0.5 c.gae_lambda = 0.95 c.ppo_minibatch_size = (c.nworkers * c.nsteps) // 4 c.auxloss_use_ratio = min(1.0, 32.0 / c.nworkers) c.use_reward_monitor = True # eval settings c.eval_env = Atari(envname, cfg=rnd.atari_config()) c.episode_log_freq = 100 c.eval_freq = None c.save_freq = int(1e8) return c if __name__ == "__main__": options = [click.Option(["--rnd-lr"], type=float, default=5.0e-5)] cli.run_cli( config, rnd.TunedRNDAgent, os.path.realpath(__file__), options, )
from rainy.agents import DoubleDqnAgent from rainy.envs import Atari from rainy.lib.explore import EpsGreedy, LinearCooler import rainy.utils.cli as cli from torch.optim import RMSprop def config() -> Config: c = Config() c.set_env(lambda: Atari('Breakout')) c.set_optimizer(lambda params: RMSprop( params, lr=0.00025, alpha=0.95, eps=0.01, centered=True)) c.set_explorer(lambda: EpsGreedy(1.0, LinearCooler(1.0, 0.1, int(1e6)))) c.set_net_fn('value', net.value.dqn_conv()) c.replay_size = int(1e6) c.batch_size = 32 c.train_start = 50000 c.sync_freq = 10000 c.max_steps = int(2e7) c.eval_env = Atari('Breakout', episodic_life=False) c.eval_freq = None c.seed = 1 c.use_reward_monitor = True return c if __name__ == '__main__': cli.run_cli(config(), DoubleDqnAgent, script_path=os.path.realpath(__file__))