def new_vi(capacity=2, batch_size=2): capacity = capacity predict_round=3000 u_optim='adam' gamma=0.6 replay_vi = ReplayBuffer(capacity=capacity) env_VI = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new vi controller') vi = VI( replay_buffer = replay_vi, u_bounds = env_VI.u_bounds, #exploration = None, exploration = exploration, env=env_VI, predict_training_rounds=predict_round, gamma=gamma, batch_size = batch_size, predict_batch_size=32, model_nn_error_limit = 0.0008, critic_nn_error_limit = 0.001, actor_nn_error_limit = 0.001, actor_nn_lr = 0.005, critic_nn_lr = 0.01, model_nn_lr = 0.01, indice_y = None, indice_y_star = None, indice_c=None, hidden_model = 10, hidden_critic = 14, hidden_actor = 14, predict_epoch= 30, Nc=1000, u_optim=u_optim, img_path=EXP_NAME ) env_VI.reset() vi.train_identification_model() #vi.test_predict_model(test_rounds=100) return vi
def run_vi( rounds=1000, seed=random.randint(0, 1000000), name='VI', capacity=2, predict_round=3000, u_optim='adam', ): print('seed :', seed) torch.manual_seed(seed) vi_para = {'gamma': 0.2} vi = new_vi() penalty = Quadratic(**penalty_para) thickner_para['random_seed'] = seed env_vi = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=vi, env=env_vi, max_step=rounds, exp_name=name).run() print(name, ':', vi.u_iter_times * 1.0 / rounds) return res1
def run_dhp( rounds=800, seed=random.randint(0, 1000000), name='DHP', capacity=2, predict_round=3000, u_optim='adam', ): seed = 8312279 print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) dhp = new_dhp() penalty = Quadratic(**penalty_para) env_dhp = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=dhp, env=env_dhp, max_step=rounds, exp_name=name).run() return res1
def new_dhp(): capacity = 3 predict_round = 3000 gamma = 0.6 replay_DHP = ReplayBuffer(capacity=capacity) env_DHP = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new dhp controller') dhp = DHP( replay_buffer=replay_DHP, u_bounds=env_DHP.u_bounds, #exploration = None, exploration=exploration, env=env_DHP, predict_training_rounds=predict_round, gamma=gamma, batch_size=2, predict_batch_size=32, model_nn_error_limit=0.0008, critic_nn_error_limit=0.001, actor_nn_error_limit=0.0001, # 0.005 actor_nn_lr=0.008, critic_nn_lr=0.01, model_nn_lr=0.01, indice_y=None, indice_y_star=None, indice_c=None, hidden_model=10, hidden_critic=12, hidden_actor=14, predict_epoch=30, Na=200, Nc=300, img_path=EXP_NAME) env_DHP.reset() dhp.train_identification_model() return dhp
def new_ILPL(): predict_round = 3000 gamma = 0.6 replay_ILPL = ReplayBuffer(capacity=4) env_ILPL = Thickener( noise_p=0.03, noise_in=True, ) exploration = No_Exploration() print('make new ilpl controller') ilpl = ILPL( replay_buffer=replay_ILPL, u_bounds=env_ILPL.u_bounds, #exploration = None, exploration=exploration, env=env_ILPL, predict_training_rounds=predict_round, gamma=gamma, batch_size=2, predict_batch_size=32, model_nn_error_limit=0.0008, critic_nn_error_limit=0.1, actor_nn_error_limit=0.001, # 0.005 actor_nn_lr=0.001, critic_nn_lr=0.02, model_nn_lr=0.01, indice_y=None, indice_y_star=None, indice_c=None, hidden_model=10, hidden_critic=14, hidden_actor=14, predict_epoch=30, Na=220, Nc=500, img_path=EXP_NAME) env_ILPL.reset() ilpl.train_identification_model() return ilpl
def run_ILPL(rounds=1000, seed=random.randint(0, 1000000), name='ILPL', predict_round=800): print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) ilpl = new_ILPL() penalty = Quadratic(**penalty_para) env_ILPL = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=ilpl, env=env_ILPL, max_step=rounds, exp_name=name).run() return res1
def run_hdp(rounds=1000, seed=random.randint(0, 1000000), name='HDP', predict_round=800): print('seed :', seed) hdp_para = {'gamma': 0.2} torch.manual_seed(seed) hdp = new_hdp() penalty = Quadratic(**penalty_para) thickner_para['random_seed'] = seed env_hdp = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=hdp, env=env_hdp, max_step=rounds, exp_name=name).run() return res1