""" import torch import ptan from lib import data, utils, model from tensorboardX import SummaryWriter if __name__=='__main__': message = '*'*10 + ' A2C on Atari ' +'*'*10 args = utils.argpars_dqn(message) params = data.params[args.env] utils.update_params(params, args) params.n_envs = max(params.n_envs, 8) device = 'cuda' if args.cuda else 'cpu' envs = utils.createEnvs(params, stack_frames=2) shape = envs[0].observation_space.shape actions = envs[0].action_space.n net = model.A2CNet(shape, actions) net.to(device) agent = ptan.agent.ActorCriticAgent(net, device=device, apply_softmax=True) exp_src = ptan.experience.ExperienceSourceFirstLast(envs, agent, params.gamma,steps_count=params.steps) generator = utils.BatchGenerator(exp_src, params) mean_monitor = utils.MeanRewardsMonitor(envs[0], net, 'A2C', params.solve_rewards) writer = SummaryWriter(logdir=mean_monitor.runs_dir,comment=params.frame_stack) optimizer = torch.optim.Adam(net.parameters(), lr=params.lr) # lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.75, patience=20000,
default='pong', choices=GAMES, help='name of the game: invaders(default), pong, breakout') parser.add_argument('--cuda', action='store_true', help='Train on GPU when available') args = parser.parse_args() device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' frames = mp.Value('i', 0) episodes = mp.Value('i', 0) params = data.params[args.env] env = utils.createEnvs(params)[0] shape = env.observation_space.shape actions = env.action_space.n net = model.DDQN(shape, actions).to(device) net.share_memory() print(net) tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.EpsilonGreedyActionSelector() agent = ptan.agent.DQNAgent(net, selector, device=device) buffer = ptan.experience.ExperienceReplayBuffer(None, params.buffer_size) optimizer = torch.optim.Adam(net.parameters(), lr=params.lr) exp_queue = mp.Queue(THREADS) proc_list = [] for n in range(THREADS):
# mp.set_start_method('spawn') os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_THREADING_LAYER'] = 'GNU' message = '*' * 10 + ' A3C on Atari ' + '*' * 10 args = utils.argpars_dqn(message) params = data.params[args.env] utils.update_params(params, args) # For A2C/A3C to converge we need a high number of environments params.n_envs = max(params.n_envs, 8) device = 'cuda' if args.cuda else 'cpu' env = utils.createEnvs( params, stack_frames=params.frame_stack)[0] # we can get rid of this one shape = env.observation_space.shape actions = env.action_space.n net = model.A2CNet(shape, actions) net.to(device) net.share_memory() optimizer = torch.optim.Adam(net.parameters(), lr=params.lr) mean_monitor = utils.MeanRewardsMonitor(env, net, ALGORITHM, params.solve_rewards) writer = SummaryWriter(logdir=mean_monitor.runs_dir, comment=params.frame_stack)
parser.add_argument('-s', '--steps', default=4, type=int, help='steps to skip while training') parser.add_argument('-n', '--envs', default=3, type=int, help='Number of environments to run simultaneously') parser.add_argument('-g', '--game', default='invaders', choices=GAMES, help='OpenAI gym environment name') parser.add_argument('--play', action='store_true', help='Play a game when the environment is solved') args = parser.parse_args() device = 'cuda' if args.cuda else 'cpu' params = data.params[args.game] envs = utils.createEnvs(params) shape = envs[0].observation_space.shape actions = envs[0].action_space.n net = model.DDQN(shape, actions).to(device) tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.EpsilonGreedyActionSelector( epsilon=params.eps_start) eps_tracker = ptan.actions.EpsilonTracker( selector, params.eps_start, params.eps_final, params.eps_frames*args.envs) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast( envs, agent, params.gamma, steps_count=args.steps)
mp.set_start_method('spawn') os.environ['OMP_NUM_THREADS'] = '1' os.environ['MKL_THREADING_LAYER'] = 'GNU' message = '*' * 10 + ' A3C GRU on Atari ' + '*' * 10 args = utils.argpars_dqn(message) params = data.params[args.env] utils.update_params(params, args) # For A2C/A3C to converge we need a lot of environments to draw observations from # This will ensure sample i.i.d (somehow!) params.n_envs = max(params.n_envs, 8) device = 'cuda' if args.cuda else 'cpu' env = utils.createEnvs(params, stack_frames=params.frame_stack)[0] shape = env.observation_space.shape actions = env.action_space.n net = model.A2Cgru(shape, actions) net.to(device) net.share_memory() optimizer = torch.optim.Adam(net.parameters(), lr=params.lr) mean_monitor = utils.MeanRewardsMonitor(env, net, ALGORITHM, params.solve_rewards) writer = SummaryWriter(logdir=mean_monitor.runs_dir, comment=params.frame_stack) lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
action='store_true', help='Activate GPU in training') parser.add_argument( '--stack', default=4, type=int, help= 'stack N frames in each observation: this will change the input of the nn' ) args = parser.parse_args() params = data.params[args.env] torch.manual_seed(params.seed) device = 'cuda' if (args.cuda and torch.cuda.is_available()) else 'cpu' envs = utils.createEnvs(params, stack_frames=args.stack) shape = envs[0].observation_space.shape actions = envs[0].action_space.n net = model.DDQN(shape, actions).to(device) tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.EpsilonGreedyActionSelector() eps_tracker = ptan.actions.EpsilonTracker(selector, params.eps_start, params.eps_final, params.eps_frames) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_src = ptan.experience.ExperienceSourceFirstLast( envs, agent, params.gamma, steps_count=params.steps)
type=int, help='Frames to skip when stacking. Must specifiy when selecting --lw') parser.add_argument('--model', type=str, help='Path to the trained state dict model') parser.add_argument( '--record', action='store_true', help='record a video of the game and store it in ~/Videos') args = parser.parse_args() params = data.params[args.env] params.max_steps = None env = utils.createEnvs(params, stack_frames=args.stack, episodic_life=False, reward_clipping=False, skip=args.skip)[0] # recording the game if args.record: env = gym.wrappers.Monitor(env, "Videos", force=True) shape, actions = env.observation_space.shape, env.action_space.n net = model.A2Cgru(shape, actions) print(net) if args.model: net.load_state_dict(load(args.model, map_location='cpu')) selector = ProbabilityActionSelector() agent = ActorCriticAgent(net, selector, apply_softmax=True) utils.play(env, agent, wait=args.wait, render=args.render)