예제 #1
0
파일: ppo.py 프로젝트: AliBaheri/PPO
def watch(sess_):
    with sess_.as_default():
        env_monitor = GymEnvironment(env_name=env_name,
                                     log_path=None,
                                     render=True)
        info_ = env_monitor.reset()[brain_name]
        trainer_monitor = Trainer(ppo_model, sess_, info_, is_continuous,
                                  use_observations, use_states, False)
        steps_ = sess_.run([ppo_model.global_step])

        print("Starting watcher.")
        while True:
            done = False
            info_ = env_monitor.reset()[brain_name]
            while not done:
                info_ = trainer_monitor.take_action(info_,
                                                    env_monitor,
                                                    brain_name,
                                                    steps_,
                                                    normalize,
                                                    stochastic=False)
                done = info_.local_done[0]
예제 #2
0
# summary-freq=<n>         Frequency at which to save training statistics [default: 10000].
summary_freq = buffer_size * 5
# save-freq=<n>            Frequency at which to save model [default: 50000].
save_freq = summary_freq
# train                    Whether to train model, or only run inference [default: False].
train_model = True
# render environment to display progress
render = True
# save recordings of episodes
record = False

os.environ[
    "CUDA_VISIBLE_DEVICES"] = "-1"  #"0"# 0 is effective GPU #"-1"  # GPU is not efficient here

env_name = 'RocketLander-v0'
env = GymEnvironment(env_name=env_name, log_path="./PPO_log", skip_frames=5)
env_render = GymEnvironment(env_name=env_name,
                            log_path="./PPO_log_render",
                            render=True,
                            record=record)
fps = env_render.env.metadata.get('video.frames_per_second', 30)

print(str(env))
brain_name = env.external_brain_names[0]

tf.reset_default_graph()

ppo_model = create_agent_model(env,
                               lr=learning_rate,
                               h_size=hidden_units,
                               epsilon=epsilon,
예제 #3
0
파일: ppo.py 프로젝트: AliBaheri/PPO
hidden_units = 128
# lambd=<n>                Lambda parameter for GAE [default: 0.95].
lambd = 0.95
# learning-rate=<rate>     Model learning rate [default: 3e-4].
learning_rate = 3e-4
# normalize                Whether to normalize the state input using running statistics [default: False].
normalize = False
# num-epoch=<n>            Number of gradient descent steps per batch of experiences [default: 5].
num_epoch = 5
# num-layers=<n>           Number of hidden layers between state/observation and outputs [default: 2].
num_layers = 1
# time-horizon=<n>         How many steps to collect per agent before adding to buffer [default: 2048].
time_horizon = 1024

env_name = 'RocketLander-v0'
env = GymEnvironment(env_name=env_name, log_path="./PPO_log")

print(str(env))
brain_name = env.external_brain_names[0]

tf.reset_default_graph()

ppo_model = create_agent_model(env,
                               lr=learning_rate,
                               h_size=hidden_units,
                               epsilon=epsilon,
                               beta=beta,
                               max_step=max_steps,
                               normalize=normalize,
                               num_layers=num_layers)