config.lwf = args.lwf config.lwf_loss = args.lwf_loss config.lwf_weight = args.lwf_weight config.num_old_actions = int(args.num_old_actions) return config def print_config(config): print 'Current config:\n' variables = zip(vars(config).keys(), vars(config).values()) for var, val in sorted(variables): print var + ' = ' + str(val) if __name__ == '__main__': args = parse_args() my_config = modify_config(args) print_config(my_config) with tf.device('/gpu:' + str(args.gpu)): # make env env = gym.make(my_config.env_name) env = wrap_dqn(env) model = NatureQN(env, my_config) model.initialize_eval() model.evaluate() if my_config.record: model.record()
of the training is to use Tensorboard. The starter code writes summaries of different variables. To launch tensorboard, open a Terminal window and run tensorboard --logdir=results/ Then, connect remotely to address-ip-of-the-server:6006 6006 is the default port used by tensorboard. """ if __name__ == '__main__': # make env starttime = time.time() env = gym.make(config.env_name) env = MaxAndSkipEnv(env, skip=config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=config.overwrite_render) # exploration strategy exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end, config.eps_nsteps) # learning rate schedule lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) # train model model = NatureQN(env, config) model.run(exp_schedule, lr_schedule) print('Total render time : {:.2f}'.format(time.time() - starttime))
of the training is to use Tensorboard. The starter code writes summaries of different variables. To launch tensorboard, open a Terminal window and run tensorboard --logdir=results/ Then, connect remotely to address-ip-of-the-server:6006 6006 is the default port used by tensorboard. """ if __name__ == '__main__': # make env env = gym.make(config.env_name) env = MaxAndSkipEnv(env, skip=config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=config.overwrite_render) # exploration strategy exp_schedule = LinearExploration(env, config.eps_begin, config.eps_end, config.eps_nsteps) # learning rate schedule lr_schedule = LinearSchedule(config.lr_begin, config.lr_end, config.lr_nsteps) # train model model = NatureQN(env, config) model.original_schedule = config.original_schedule model.logger.info('original schedule: {}'.format(model.original_schedule)) model.run(exp_schedule, lr_schedule)
env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=config.overwrite_render) rewards = [] experts_meta_lis = [ './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/deepdqn_weights/.meta', './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/resnet_weights/.meta', './core/checkpoints/policy_gradients/policy_network.ckpt.meta'] experts_chkpt_lis = [ './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/deepdqn_weights/', './core/checkpoints/q_learning/skip_connection/q5_train_atari_nature/resnet_weights/', './core/checkpoints/policy_gradients/policy_network.ckpt'] experts = [] #temp_sess = None for meta_path, chkpt_path in zip(experts_meta_lis, experts_chkpt_lis): print([n.name for n in tf.get_default_graph().as_graph_def().node]) if "deepdqn" in meta_path: model = NatureQN(env, config) if "resnet" in meta_path: model = ResnetQN(env, config) if "policy" in meta_path: continue # if temp_sess == None: #temp_sess = model.sess model.initialize(meta_path, chkpt_path) experts.append(model) # with model.graph.as_default(): print("LOADED ALL MODELS") for i in range(len(experts)): guide = experts[i] guide_experience = [[]]
If so, please report your hyperparameters. You'll find the results, log and video recordings of your agent every 250k under the corresponding file in the results folder. A good way to monitor the progress of the training is to use Tensorboard. The starter code writes summaries of different variables. To launch tensorboard, open a Terminal window and run tensorboard --logdir=results/ Then, connect remotely to address-ip-of-the-server:6006 6006 is the default port used by tensorboard. """ if __name__ == '__main__': # make env env = gym.make(config.env_name) env = MaxAndSkipEnv(env, skip=config.skip_frame) env = PreproWrapper(env, prepro=greyscale, shape=(80, 80, 1), overwrite_render=config.overwrite_render) # load model model = NatureQN(env, config) model.initialize() loaded = load_model(model) assert loaded != False, "Loading failed" # evaluate one episode of data model.evaluate(env, 1)