# env = make_vec_env(env_id, n_envs=1, seed=0) # env = VecCheckNan(env, raise_exception=True) # env = check_env(env) # The noise objects for DDPG n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=.1 * np.ones(n_actions)) # model = TD3(MlpPolicy, env, action_noise=action_noise, verbose=1, tensorboard_log="./TD3_spm_v2_SOC_point5_two_state/") model = DDPG(MlpPolicy, env, action_noise=action_noise, verbose=1, tensorboard_log="./DDPG_spm_v2_SOC_point5_two_state/") model.learn(total_timesteps=25000, tb_log_name='DDPG_test_run_3_SOCpoint5_two_state') # model.save('DDPG_test_3_SOC_point5_two_states') # # # model.load('DDPG_test_2_SOC_point5_two_states') mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) print("Mean Reward = ", mean_reward) epsi_sp_list = [] action_list = [] soc_list = [] Concentration_list = [] Concentration_list1 = [] obs = env.reset() for _ in range(3600): action, _states = model.predict(obs, deterministic=True) obs, rewards, done, info = env.step(action)
lr = 3e-4 # Instantiate Model n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=-30 * np.zeros(n_actions), sigma=.75 * np.ones(n_actions)) model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1) # model = PPO('MlpPolicy', env, tensorboard_log=log_dir) # Train OR Load Model model.learn(total_timesteps=25000) # model.save(model_dir_description) mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10) print("Mean Reward = ", mean_reward) epsi_sp_list = [] action_list = [] soc_list = [] Concentration_list = [] Concentration_list1 = [] obs = env.reset() for _ in range(3600): action, _states = model.predict(obs) obs, rewards, done, info = env.step(action)
import gym import numpy as np from stable_baselines3 import DDPG from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise env = gym.make('MountainCarContinuous-v0') # The noise objects for DDPG n_actions = env.action_space.shape[-1] action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) model = DDPG('MlpPolicy', env, action_noise=action_noise, verbose=1) model.learn(total_timesteps=1000000, log_interval=10) model.save("ddpg_pendulum") env = model.get_env() del model # remove to demonstrate saving and loading model = DDPG.load("ddpg_pendulum") obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()