def __init__(self, config, logger=None): """ Initialize Q Network and env Args: config: class with hyperparameters logger: logger instance from logging module """ # directory for training outputs if not os.path.exists(config.output_path): os.makedirs(config.output_path) # store hyper params self.config = config self.logger = logger if logger is None: self.logger = get_logger(config.log_path) envs = [] for env_name in config.env_names: env = gym.make(env_name) env = wrap_dqn(env) env = PreproWrapper(env, prepro=greyscale, shape=(84, 84, 1), overwrite_render=config.overwrite_render) envs.append(env) self.envs = envs self.recon = self.config.recon # build model self.build()
def record(self): """ Re create an env and record a video for one episode """ env = gym.make(self.config.env_names[self.index]) env = gym.wrappers.Monitor(env, self.config.record_path, video_callable=lambda x: True, resume=True) env = wrap_dqn(env) env = PreproWrapper(env, prepro=greyscale, shape=(84, 84, 1), overwrite_render=self.config.overwrite_render) self.evaluate(env, 1)
import utils.utils as utils from utils.srl_algorithms import ls_step, ls_step_dueling import utils.wrappers as wrappers import numpy as np import random import copy if __name__ == "__main__": params = HYPERPARAMS['boxing'] device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # conditional_update: # if true, test the updated weights before replacing the old ones, # if the new weights perform better, then replace them (bool) conditional_update = True env = gym.make(params['env_name']) env = wrappers.wrap_dqn(env) if conditional_update: test_env = gym.make(params['env_name']) test_env = wrappers.wrap_dqn(test_env) training_random_seed = 2019 save_freq = 50000 n_drl = 100000 # steps of DRL between SRL n_srl = params['replay_size'] # size of batch in SRL step num_srl_updates = 3 # number of to SRL updates to perform use_double_dqn = False use_dueling_dqn = True use_boosting = True use_ls_dqn = True use_constant_seed = True # to compare performance independently of the randomness save_for_analysis = False # save also the replay buffer for later analysis
You'll find the results, log and video recordings of your agent every 250k under the corresponding file in the results folder. A good way to monitor the progress of the training is to use Tensorboard. The starter code writes summaries of different variables. To launch tensorboard, open a Terminal window and run tensorboard --logdir=results/ Then, connect remotely to address-ip-of-the-server:6006 6006 is the default port used by tensorboard. """ if __name__ == '__main__': # make env env = gym.make(config.env_name) # env = MaxAndSkipEnv(env, skip=config.skip_frame) env = wrap_dqn(env) env = PreproWrapper(env, prepro=greyscale, shape=(84, 84, 1), overwrite_render=config.overwrite_render) # exploration strategy exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end, config.eps_nsteps) # learning rate schedule lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) # train model model = NatureQN(env, config) model.run(exp_schedule, lr_schedule)
"--env", default=BOXING_ENV_NAME, help="Environment name to use, default=" + DEFAULT_ENV_NAME) parser.add_argument("-r", "--record", help="Directory to store video recording") parser.add_argument("--no-visualize", default=True, action='store_false', dest='visualize', help="Disable visualization of the game play") args = parser.parse_args() use_dueling = True env = gym.make(args.env) env = wrappers.wrap_dqn(env) device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # env = wrappers.make_env(DEFAULT_ENV_NAME) if args.record: env = gym.wrappers.Monitor(env, args.record) if use_dueling: net = dqn_model.DuelingLSDQN(env.observation_space.shape, env.action_space.n).to(device) else: net = dqn_model.LSDQN(env.observation_space.shape, env.action_space.n).to(device) # net.load_state_dict(torch.load(args.model, map_location=lambda storage, loc: storage)) path_to_model_ckpt = './agent_ckpt/agent_ls_dqn_-boxing.pth' exists = os.path.isfile(path_to_model_ckpt) if exists: if not torch.cuda.is_available():