def new_vi(capacity=2, batch_size=2): capacity = capacity predict_round=3000 u_optim='adam' gamma=0.6 replay_vi = ReplayBuffer(capacity=capacity) env_VI = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new vi controller') vi = VI( replay_buffer = replay_vi, u_bounds = env_VI.u_bounds, #exploration = None, exploration = exploration, env=env_VI, predict_training_rounds=predict_round, gamma=gamma, batch_size = batch_size, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.001, actor_nn_error_limit = 0.001, actor_nn_lr = 0.005, critic_nn_lr = 0.01, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 14, hidden_actor = 14, predict_epoch= 30, Nc=1000, u_optim=u_optim, img_path=EXP_NAME ) env_VI.reset() vi.train_identification_model() #vi.test_predict_model(test_rounds=100) return vi
def new_dhp_vi(): capacity= 20 predict_round=6000 gamma=0.6 replay_DhpVI = ReplayBuffer(capacity=capacity) env_DhpVI = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new dhp_vi controller') dhp_vi = DhpVI( replay_buffer = replay_DhpVI, u_bounds = env_DhpVI.u_bounds, #exploration = None, exploration = exploration, env=env_DhpVI, predict_training_rounds=predict_round, gamma=gamma, batch_size = 20, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.01, actor_nn_error_limit = 0.001, # 0.005 actor_nn_lr = 0.005, critic_nn_lr = 0.001, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 12, hidden_actor = 14, predict_epoch= 30, Na=2000, Nc=100, test_period=3, max_u_iters=2000, policy_visual_period=400, img_path=EXP_NAME ) env_DhpVI.reset() dhp_vi.train_identification_model() return dhp_vi
def new_dhp(): capacity= 1 predict_round=6000 gamma=0.6 replay_DHP = ReplayBuffer(capacity=capacity) env_DHP = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new dhp controller') dhp = DHP( replay_buffer = replay_DHP, u_bounds = env_DHP.u_bounds, #exploration = None, exploration = exploration, env=env_DHP, predict_training_rounds=predict_round, gamma=gamma, batch_size = 1, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.01, actor_nn_error_limit = 0.001, # 0.005 actor_nn_lr = 0.005, critic_nn_lr = 0.001, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 12, hidden_actor = 14, predict_epoch= 30, Na=220, Nc=100, test_period=3, img_path=EXP_NAME, ) env_DHP.reset() dhp.train_identification_model() return dhp
def new_vi_ub(): capacity=2 predict_round=3000 u_optim='sgd' replay_vi = ReplayBuffer(capacity=capacity) env_VI = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new viuk controller') vi = VIub( replay_buffer = replay_vi, u_bounds = env_VI.u_bounds, #exploration = None, exploration = exploration, env=env_VI, predict_training_rounds=predict_round, gamma=0.6, batch_size = capacity, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.001, actor_nn_error_limit = 0.001, actor_nn_lr = 0.005, critic_nn_lr = 0.01, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 14, hidden_actor = 14, predict_epoch= 30, u_optim=u_optim, find_lr= 0.4, find_time_max=20 ) env_VI.reset() vi.train_identification_model() return vi
def new_hdp(): predict_round=3000 gamma=0.6 replay_hdp = ReplayBuffer(capacity=2) env_HDP = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new hdp controller') hdp = HDP( replay_buffer = replay_hdp, u_bounds = env_HDP.u_bounds, #exploration = None, exploration = exploration, env=env_HDP, predict_training_rounds=predict_round, gamma=gamma, batch_size = 2, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.001, actor_nn_error_limit = 0.001, # 0.005 actor_nn_lr = 0.003, critic_nn_lr = 0.02, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 14, hidden_actor = 14, predict_epoch= 30, Na=220, Nc = 500, img_path=EXP_NAME ) env_HDP.reset() hdp.train_identification_model() return hdp
def new_vi_sample(capacity=2, predict_round=3000): replay_vi_sample = ReplayBuffer(capacity=capacity) env_VI_sample = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new vi_sample controller') vi_sample = ViSample( replay_buffer=replay_vi_sample, u_bounds=env_VI_sample.u_bounds, #exploration = None, exploration=exploration, env=env_VI_sample, predict_training_rounds=predict_round, gamma=0.4, batch_size=capacity, predict_batch_size=32, model_nn_error_limit=0.0008, critic_nn_error_limit=0.001, actor_nn_error_limit=0.001, actor_nn_lr=0.005, critic_nn_lr=0.01, model_nn_lr=0.01, indice_y=None, indice_y_star=None, indice_c=None, hidden_model=10, hidden_critic=14, hidden_actor=14, predict_epoch=30, ) env_VI_sample.reset() vi_sample.train_identification_model() vi_sample.test_predict_model(test_rounds=100) return vi_sample
y_low=[-15, -15], y_high=[15, 15], u_high=[2, 2], u_low=[-2, -2], reward_calculator=reward3, normalize=False) replay_buffer1 = ReplayBuffer(1000) replay_buffer2 = ReplayBuffer(100) exploration_noise1 = EGreedy( action_bounds=env1.u_bounds, epsilon_start=0.5, epsilon_final=0.4, epsilon_decay=100000, ) exploration_noise1 = No_Exploration() exploration_noise2 = GaussianExploration( action_bounds=env2.external_u_bounds, min_sigma=1.0, max_sigma=1.01, decay_period=100000) controller1 = Td3(gpu_id=1, num_inputs=env1.observation_size(), num_actions=2, act_hidden_size=16, val_hidden_size=16, replay_buffer=replay_buffer1, u_bounds=env1.u_bounds, exploration=exploration_noise1,