env, action_noise=action_noise, verbose=1, tensorboard_log="./h={}/".format(horizons[rank]), gamma=0.99, learning_rate=0.0003, ) # model = DDPG.load("Model_DDPG_FS_30.zip") # model.learning_rate = 0.0003 # model.gamma = 0.99 # action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.05*np.ones(n_actions)) # action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.075 * np.ones(n_actions)) # model.action_noise = action_noise trainer = Trainer(env) trainer.retrain_rl(model, episodes=20000, path="./h={}/".format(horizons[rank])) # ## Training on horizon observations # env = HorizonObservationWrapper(gym.make("reference_environment:reference-environment-v0"), # horizon_length=horizons[rank], # transform_name="Standard") # trainer = Trainer(env) # trainer.train_rl(models_to_train=1, episodes_per_model=20000, path='./h={}/'.format(horizons[rank])) # ## Testing random action wrapper # env = JoesActionWrapper(gym.make("reference_environment:reference-environment-v0")) # trainer = Trainer(env) # trainer.train_rl(models_to_train=1, episodes_per_model=20000) ### Testing phase reward wrapper
from stable_baselines3.common.callbacks import EvalCallback env_action = RelativeActionWrapper(gym.make("reference_environment:reference-environment-v0")) env_horizon = HorizonObservationWrapper(env_action, horizon_length=4, transform_name="Deltas") env = PhaseRewardWrapper(env_horizon, phase="Full") # Set Phase to Full eval_callback = EvalCallback(env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=500, deterministic=True, render=False) ### DDPG Noise ### Try increasing the noise when retraining. ### Try less noise based on the policy plot. n_actions = env.action_space.shape[-1] action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=1 * np.ones(n_actions)) # action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1, tensorboard_log="./logs", gamma=0.99, learning_rate=0.0003, ) # model = DDPG.load("Model_DDPG_FS_30.zip") # model.learning_rate = 0.0003 # model.gamma = 0.99 # action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=0.05*np.ones(n_actions)) # action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.075 * np.ones(n_actions)) # model.action_noise = action_noise trainer = Trainer(env) trainer.retrain_rl(model, episodes=20000)