env,
    action_noise=action_noise,
    verbose=1,
    tensorboard_log="./h={}/".format(horizons[rank]),
    gamma=0.99,
    learning_rate=0.0003,
)
# model = DDPG.load("Model_DDPG_FS_30.zip")
# model.learning_rate = 0.0003
# model.gamma = 0.99
# action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.05*np.ones(n_actions))
# action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.075 * np.ones(n_actions))
# model.action_noise = action_noise
trainer = Trainer(env)
trainer.retrain_rl(model,
                   episodes=20000,
                   path="./h={}/".format(horizons[rank]))

# ## Training on horizon observations
# env = HorizonObservationWrapper(gym.make("reference_environment:reference-environment-v0"),
#                               horizon_length=horizons[rank],
#                               transform_name="Standard")
# trainer = Trainer(env)
# trainer.train_rl(models_to_train=1, episodes_per_model=20000, path='./h={}/'.format(horizons[rank]))

# ## Testing random action wrapper
# env = JoesActionWrapper(gym.make("reference_environment:reference-environment-v0"))
# trainer = Trainer(env)
# trainer.train_rl(models_to_train=1, episodes_per_model=20000)

### Testing phase reward wrapper
Beispiel #2
0
from stable_baselines3.common.callbacks import EvalCallback
env_action = RelativeActionWrapper(gym.make("reference_environment:reference-environment-v0"))
env_horizon = HorizonObservationWrapper(env_action,
                              horizon_length=4,
                              transform_name="Deltas")
env = PhaseRewardWrapper(env_horizon, phase="Full")          # Set Phase to Full
eval_callback = EvalCallback(env, best_model_save_path='./logs/',
                             log_path='./logs/', eval_freq=500,
                             deterministic=True, render=False)


### DDPG Noise
### Try increasing the noise when retraining.
### Try less noise based on the policy plot.
n_actions = env.action_space.shape[-1]
action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=1 * np.ones(n_actions))
# action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions))

model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1, tensorboard_log="./logs",
            gamma=0.99,
            learning_rate=0.0003,
            )
# model = DDPG.load("Model_DDPG_FS_30.zip")
# model.learning_rate = 0.0003
# model.gamma = 0.99
# action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.05*np.ones(n_actions))
# action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.075 * np.ones(n_actions))
# model.action_noise = action_noise
trainer = Trainer(env)
trainer.retrain_rl(model, episodes=20000)