def new_adhdp(capacity=2, off_policy=False): replay_hdp = ReplayBuffer(capacity=capacity) env_ADHDP = Thickener() #exploration = No_Exploration() exploration = EGreedy(env_ADHDP.external_u_bounds, epsilon_start=0.5, epsilon_final=0, epsilon_decay=1000) adhdp = ADHDP( replay_buffer=replay_hdp, u_bounds=env_ADHDP.u_bounds, #exploration = None, exploration=exploration, env=env_ADHDP, gamma=0.7, batch_size=capacity, predict_batch_size=32, critic_nn_error_limit=0.02, actor_nn_error_limit=0.001, actor_nn_lr=0.01, critic_nn_lr=0.01, indice_y=None, indice_y_star=None, indice_c=None, hidden_critic=6, hidden_actor=6, max_iter_c=50, off_policy=off_policy, ) return adhdp
def run(): # 定义积分惩罚项 penalty = IntegralPenalty(weight_matrix=[200, 0.02], S=[0.1, 0.1]) #penalty = IntegralPenalty(weight_matrix=[1,1], S=[0.00001,0.00001]) # 定义初始化env对象的参数 env_para = { "dt": 1, "normalize": False, "noise_in": False, "penalty_calculator": penalty, "y_star": [1.55, 650], "y_start": [1.4, 680] #"y_star": np.array([17.32, 0.84], dtype=float) } env = Thickener(**env_para) env.reset() # 回放池大小为1,batch_size为1 replaybuff = ReplayBuffer(capacity=1) # 参照论文给出的参数 controller = ILPL(env=env, u_bounds=env.u_bounds, replay_buffer=replaybuff, Vm=np.diag([1, 0.01, 0.1, 0.1, 0.1, 0.1]), Lm=np.diag([1, 0.01]), Va=np.diag([1, 0.01, 1, 0.01, 0.1, 0.1]), La=np.diag([1, 1]), Vc=np.diag([1, 0.01, 1, 0.01, 0.1, 0.1]), Lc=np.diag([0.1]), predict_training_rounds=5000, gamma=0.6, batch_size=1, predict_batch_size=32, model_nn_error_limit=0.08, critic_nn_error_limit=0.1, actor_nn_loss=0.6, u_iter=30, u_begin=[80, 38], indice_y=[2, 3], indice_y_star=[0, 1], u_first=[80, 38]) # 定义实验块 exp = OneRoundExp(env=env, controller=controller, max_step=300, exp_name="ILPL") res = exp.run() eval_res = OneRoundEvaluation(res_list=[res]) eval_res.plot_all()
def new_vi(capacity=2, batch_size=2): capacity = capacity predict_round=3000 u_optim='adam' gamma=0.6 replay_vi = ReplayBuffer(capacity=capacity) env_VI = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new vi controller') vi = VI( replay_buffer = replay_vi, u_bounds = env_VI.u_bounds, #exploration = None, exploration = exploration, env=env_VI, predict_training_rounds=predict_round, gamma=gamma, batch_size = batch_size, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.001, actor_nn_error_limit = 0.001, actor_nn_lr = 0.005, critic_nn_lr = 0.01, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 14, hidden_actor = 14, predict_epoch= 30, Nc=1000, u_optim=u_optim, img_path=EXP_NAME ) env_VI.reset() vi.train_identification_model() #vi.test_predict_model(test_rounds=100) return vi
def new_dhp_vi(): capacity= 20 predict_round=6000 gamma=0.6 replay_DhpVI = ReplayBuffer(capacity=capacity) env_DhpVI = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new dhp_vi controller') dhp_vi = DhpVI( replay_buffer = replay_DhpVI, u_bounds = env_DhpVI.u_bounds, #exploration = None, exploration = exploration, env=env_DhpVI, predict_training_rounds=predict_round, gamma=gamma, batch_size = 20, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.01, actor_nn_error_limit = 0.001, # 0.005 actor_nn_lr = 0.005, critic_nn_lr = 0.001, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 12, hidden_actor = 14, predict_epoch= 30, Na=2000, Nc=100, test_period=3, max_u_iters=2000, policy_visual_period=400, img_path=EXP_NAME ) env_DhpVI.reset() dhp_vi.train_identification_model() return dhp_vi
def new_dhp(): capacity= 1 predict_round=6000 gamma=0.6 replay_DHP = ReplayBuffer(capacity=capacity) env_DHP = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new dhp controller') dhp = DHP( replay_buffer = replay_DHP, u_bounds = env_DHP.u_bounds, #exploration = None, exploration = exploration, env=env_DHP, predict_training_rounds=predict_round, gamma=gamma, batch_size = 1, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.01, actor_nn_error_limit = 0.001, # 0.005 actor_nn_lr = 0.005, critic_nn_lr = 0.001, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 12, hidden_actor = 14, predict_epoch= 30, Na=220, Nc=100, test_period=3, img_path=EXP_NAME, ) env_DHP.reset() dhp.train_identification_model() return dhp
def new_vi_ub(): capacity=2 predict_round=3000 u_optim='sgd' replay_vi = ReplayBuffer(capacity=capacity) env_VI = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new viuk controller') vi = VIub( replay_buffer = replay_vi, u_bounds = env_VI.u_bounds, #exploration = None, exploration = exploration, env=env_VI, predict_training_rounds=predict_round, gamma=0.6, batch_size = capacity, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.001, actor_nn_error_limit = 0.001, actor_nn_lr = 0.005, critic_nn_lr = 0.01, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 14, hidden_actor = 14, predict_epoch= 30, u_optim=u_optim, find_lr= 0.4, find_time_max=20 ) env_VI.reset() vi.train_identification_model() return vi
def new_hdp(): predict_round=3000 gamma=0.6 replay_hdp = ReplayBuffer(capacity=2) env_HDP = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new hdp controller') hdp = HDP( replay_buffer = replay_hdp, u_bounds = env_HDP.u_bounds, #exploration = None, exploration = exploration, env=env_HDP, predict_training_rounds=predict_round, gamma=gamma, batch_size = 2, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.001, actor_nn_error_limit = 0.001, # 0.005 actor_nn_lr = 0.003, critic_nn_lr = 0.02, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 14, hidden_actor = 14, predict_epoch= 30, Na=220, Nc = 500, img_path=EXP_NAME ) env_HDP.reset() hdp.train_identification_model() return hdp
def new_adhdp(random_act=False): period = 20 capacity = period train_period = period batch_size = period off_policy = False replay_hdp = ReplayBuffer(capacity=capacity) env_ADHDP = Thickener() #exploration = No_Exploration() #exploration = EGreedy(env_ADHDP.external_u_bounds, epsilon_start=0.6,epsilon_final=0,epsilon_decay=10) exploration = GaussianExploration( action_bounds=env_ADHDP.external_u_bounds, min_sigma=0.00, max_sigma=0.01, decay_period=600) if random_act: exploration = EGreedy(action_bounds=env_ADHDP.external_u_bounds, epsilon_start=1, epsilon_final=1, epsilon_decay=100) train_period = 20 adhdp = ADHDP( replay_buffer=replay_hdp, u_bounds=env_ADHDP.u_bounds, #exploration = None, exploration=exploration, env=env_ADHDP, gamma=0.8, batch_size=batch_size, predict_batch_size=32, critic_nn_error_limit=0.05, actor_nn_error_limit=0.001, actor_nn_lr=0.003, critic_nn_lr=0.05, indice_y=None, indice_y_star=None, indice_c=None, hidden_critic=16, hidden_actor=20, off_policy=off_policy, Nc=1000, Na=50, train_period=train_period, test_period=1) return adhdp
def new_vi_sample(capacity=2, predict_round=3000): replay_vi_sample = ReplayBuffer(capacity=capacity) env_VI_sample = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new vi_sample controller') vi_sample = ViSample( replay_buffer=replay_vi_sample, u_bounds=env_VI_sample.u_bounds, #exploration = None, exploration=exploration, env=env_VI_sample, predict_training_rounds=predict_round, gamma=0.4, batch_size=capacity, predict_batch_size=32, model_nn_error_limit=0.0008, critic_nn_error_limit=0.001, actor_nn_error_limit=0.001, actor_nn_lr=0.005, critic_nn_lr=0.01, model_nn_lr=0.01, indice_y=None, indice_y_star=None, indice_c=None, hidden_model=10, hidden_critic=14, hidden_actor=14, predict_epoch=30, ) env_VI_sample.reset() vi_sample.train_identification_model() vi_sample.test_predict_model(test_rounds=100) return vi_sample
import numpy as np import math import Control_Exp1001 as CE import os import json from Control_Exp1001.demo.thickener.adhdp import ADHDP from Control_Exp1001.simulation.thickener import Thickener from Control_Exp1001.common.penaltys.demo_penalty import DemoPenalty import matplotlib.pyplot as plt from Control_Exp1001.demo.thickener.one_round_exp import OneRoundExp from Control_Exp1001.demo.thickener.one_round_evaluation import OneRoundEvaluation from Control_Exp1001.common.action_noise.e_greedy import EGreedy from Control_Exp1001.common.replay.replay_buffer import ReplayBuffer replay_hdp = ReplayBuffer(capacity=20) env_ADHDP = Thickener() exploration = EGreedy(epsilon_start=1, epsilon_final=0.0001, epsilon_decay=300, action_bounds=env_ADHDP.u_bounds) adhdp = ADHDP( replay_buffer=replay_hdp, u_bounds=env_ADHDP.u_bounds, #exploration = None, exploration=exploration, env=env_ADHDP, gamma=0.1, batch_size=10, predict_batch_size=32,
import numpy as np import math import Control_Exp1001 as CE import os import json from Control_Exp1001.demo.thickener.hdp_sample import HDP_sample from Control_Exp1001.simulation.thickener import Thickener from Control_Exp1001.common.penaltys.demo_penalty import DemoPenalty import matplotlib.pyplot as plt from Control_Exp1001.demo.thickener.one_round_exp import OneRoundExp from Control_Exp1001.demo.thickener.one_round_evaluation import OneRoundEvaluation from Control_Exp1001.common.action_noise.e_greedy import EGreedy from Control_Exp1001.common.replay.replay_buffer import ReplayBuffer replay_hdp_sample = ReplayBuffer(capacity=30) env_HDP_sample = Thickener(noise_p=0.01, noise_in=True) exploration = EGreedy(epsilon_start=0.0, epsilon_final=0.0000, epsilon_decay=100, action_bounds=env_HDP_sample.u_bounds) hdp_sample = HDP_sample( replay_buffer=replay_hdp_sample, u_bounds=env_HDP_sample.u_bounds, #exploration = None, exploration=exploration, env=env_HDP_sample, predict_training_rounds=3000, gamma=0.1, batch_size=10,
y_low=[-15, -15], y_high=[15, 15], u_high=[2, 2], u_low=[-2, -2], reward_calculator=reward2, normalize=False) env3 = Env(dt=1, size_yudc=[2, 2, 0, 2], y_low=[-15, -15], y_high=[15, 15], u_high=[2, 2], u_low=[-2, -2], reward_calculator=reward3, normalize=False) replay_buffer1 = ReplayBuffer(1000) replay_buffer2 = ReplayBuffer(100) exploration_noise1 = EGreedy( action_bounds=env1.u_bounds, epsilon_start=0.5, epsilon_final=0.4, epsilon_decay=100000, ) exploration_noise1 = No_Exploration() exploration_noise2 = GaussianExploration( action_bounds=env2.external_u_bounds, min_sigma=1.0, max_sigma=1.01, decay_period=100000)
def run(): # 定义积分惩罚项 penalty = IntegralPenalty(weight_matrix=[1,1], S=[0.1,0.1]) #penalty = IntegralPenalty(weight_matrix=[1,1], S=[0.00001,0.00001]) # 定义初始化env对象的参数 env_para = { "dt": 20, "normalize": False, "penalty_calculator": penalty, "one_step_length": 0.005, #"y_star": np.array([17.32, 0.84], dtype=float) "y_star": np.array([17.3, 0.8], dtype=float) } # 验证env_para情况下系统收敛状况 # Flotation.flotation_test(init_para=env_para) # simulation_test(Flotation, init_para=env_para, mode="const", const_u=[[1.8, 19]]) # return env = Flotation( **env_para ) # 重新定义一下y_star env.y_star = np.array([17.32, 0.84]) # 回放池大小为1,batch_size为1 replaybuff = ReplayBuffer(capacity=1) # 参照论文给出的参数 controller = ILPL(env=env, u_bounds=env.u_bounds, replay_buffer=replaybuff, Vm=np.diag([0.1,1,1,0.1]), Lm=np.diag([0.1,1]), Va=np.diag([0.1, 1, 0.1, 1]), La=np.diag([0.1,1]), Vc=np.diag([0.1,1,0.1,1]), Lc=np.diag([0.1]), predict_training_rounds=5000, gamma=0.6, batch_size=1, predict_batch_size = 32, model_nn_error_limit=0.08, critic_nn_error_limit=0.1, actor_nn_loss= 0.6, u_iter=30, u_begin=[1.5,20], indice_y=[2, 3], indice_y_star=[0, 1], ) # 定义实验块 exp = FlotationExp( env=env, controller=controller, max_step=200, exp_name="ILPL" ) exp.run()