def watch(sess_): with sess_.as_default(): env_monitor = GymEnvironment(env_name=env_name, log_path=None, render=True) info_ = env_monitor.reset()[brain_name] trainer_monitor = Trainer(ppo_model, sess_, info_, is_continuous, use_observations, use_states, False) steps_ = sess_.run([ppo_model.global_step]) print("Starting watcher.") while True: done = False info_ = env_monitor.reset()[brain_name] while not done: info_ = trainer_monitor.take_action(info_, env_monitor, brain_name, steps_, normalize, stochastic=False) done = info_.local_done[0]
# summary-freq=<n> Frequency at which to save training statistics [default: 10000]. summary_freq = buffer_size * 5 # save-freq=<n> Frequency at which to save model [default: 50000]. save_freq = summary_freq # train Whether to train model, or only run inference [default: False]. train_model = True # render environment to display progress render = True # save recordings of episodes record = False os.environ[ "CUDA_VISIBLE_DEVICES"] = "-1" #"0"# 0 is effective GPU #"-1" # GPU is not efficient here env_name = 'RocketLander-v0' env = GymEnvironment(env_name=env_name, log_path="./PPO_log", skip_frames=5) env_render = GymEnvironment(env_name=env_name, log_path="./PPO_log_render", render=True, record=record) fps = env_render.env.metadata.get('video.frames_per_second', 30) print(str(env)) brain_name = env.external_brain_names[0] tf.reset_default_graph() ppo_model = create_agent_model(env, lr=learning_rate, h_size=hidden_units, epsilon=epsilon,
hidden_units = 128 # lambd=<n> Lambda parameter for GAE [default: 0.95]. lambd = 0.95 # learning-rate=<rate> Model learning rate [default: 3e-4]. learning_rate = 3e-4 # normalize Whether to normalize the state input using running statistics [default: False]. normalize = False # num-epoch=<n> Number of gradient descent steps per batch of experiences [default: 5]. num_epoch = 5 # num-layers=<n> Number of hidden layers between state/observation and outputs [default: 2]. num_layers = 1 # time-horizon=<n> How many steps to collect per agent before adding to buffer [default: 2048]. time_horizon = 1024 env_name = 'RocketLander-v0' env = GymEnvironment(env_name=env_name, log_path="./PPO_log") print(str(env)) brain_name = env.external_brain_names[0] tf.reset_default_graph() ppo_model = create_agent_model(env, lr=learning_rate, h_size=hidden_units, epsilon=epsilon, beta=beta, max_step=max_steps, normalize=normalize, num_layers=num_layers)