def run_vi( rounds=1000, seed=random.randint(0, 1000000), name='VI', capacity=2, predict_round=3000, u_optim='adam', ): print('seed :', seed) torch.manual_seed(seed) vi_para = {'gamma': 0.2} vi = new_vi() penalty = Quadratic(**penalty_para) env_vi = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=vi, env=env_vi, max_step=rounds, exp_name=name).run() print(name, ':', vi.u_iter_times * 1.0 / rounds) return res1
def run_dhp( rounds=800, seed=random.randint(0, 1000000), name='DHP', capacity=2, predict_round=3000, u_optim='adam', ): #seed = 8312279 print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) dhp = new_dhp() penalty = Quadratic(**penalty_para) env_dhp = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=dhp, env=env_dhp, max_step=rounds, exp_name=name).run() return res1
def run_hdp( rounds=1000, seed=random.randint(0, 1000000), name='VI', capacity=2, batch_size=2, predict_round=3000, u_optim='adam', ): print('seed :', seed) torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) vi = new_hdp(capacity=capacity, batch_size=batch_size) penalty = Quadratic(**penalty_para) env_vi = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=vi, env=env_vi, max_step=rounds, exp_name=name).run() return res1
def run_hdp(rounds=1000, seed=random.randint(0, 1000000), name='HDP', predict_round=800): hdp_para = {'gamma': 0.9} print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) from Control_Exp1001.demo.thickener.hdp_maker import new_hdp hdp = new_hdp(predict_round=predict_round, **hdp_para) penalty = Quadratic(**penalty_para) env_hdp = Thickener( penalty_calculator=penalty, random_seed=seed, **thickner_para, ) res1 = OneRoundExp(controller=hdp, env=env_hdp, max_step=rounds, exp_name=name).run() return res1
def run_vi_ub(rounds=1000, seed=random.randint(0, 1000000), name='VI_uk', capacity=2, predict_round=3000, u_optim='sgd'): print('seed :', seed) torch.manual_seed(seed) from Control_Exp1001.demo.thickener.vi_ub_maker import new_vi_ub vi = new_vi_ub(capacity=capacity, predict_round=predict_round, u_optim=u_optim) penalty = Quadratic(**penalty_para) env_vi = Thickener( penalty_calculator=penalty, **thickner_para, ) mse_vi_pre.append(vi.con_predict_mse) res1 = OneRoundExp(controller=vi, env=env_vi, max_step=rounds, exp_name=name).run() print(name, ':', vi.u_iter_times * 1.0 / rounds) return res1
def run_adhdp_offpolicy(rounds=1000, seed=random.randint(0, 1000000), name='CoQL', train_rounds=100, train_step_in_round=100): print('seed :', seed) random.seed(seed) np.random.seed(seed) from Control_Exp1001.demo.thickener.adhdp_make import new_adhdp adhdp = new_adhdp(capacity=4, off_policy=True) penalty = Quadratic(**penalty_para) env_adhdp = Thickener( penalty_calculator=penalty, **thickner_para, ) env_adhdp.reset() res1 = OffLineExp(controller=adhdp, env=env_adhdp, max_step=rounds, exp_name=name, train_rounds=train_rounds, train_step_in_round=train_step_in_round).run() return res1
def run_dhp_vi( rounds=1000, seed=random.randint(0, 1000000), name='DHPVI', capacity=2, predict_round=3000, u_optim='adam', ): print('seed :', seed) torch.manual_seed(seed) dhp_vi_para = { #'gamma': 0.2 } dhp_vi = new_dhp_vi() specific_penalty_para = copy.deepcopy(penalty_para) specific_penalty_para['S'] = [0.0001, 0.0008] penalty = Quadratic(**specific_penalty_para) env_dhp_vi = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=dhp_vi, env=env_dhp_vi, max_step=rounds, exp_name=name).run() return res1
def run_hdp(rounds=1000,seed=random.randint(0,1000000),name='HDP', predict_round=800): print('seed :',seed) hdp_para = { 'gamma':0.2 } hdp = new_hdp() penalty = Quadratic(**penalty_para) env_hdp = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=hdp, env=env_hdp,max_step=rounds, exp_name=name).run() return res1
def run_hdp_sample(rounds=1000, seed=random.randint(0, 1000000)): print('seed :', seed) print('hdp_sample') from Control_Exp1001.demo.thickener.hdp_sample_maker import hdp_sample penalty = Quadratic(**penalty_para) env_hdp = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=hdp_sample, env=env_hdp, max_step=rounds, exp_name='HDP_sample').run() return res1
def run_adhdp(rounds=1000, seed=random.randint(0, 1000000), name='ADHDP'): print('seed :', seed) random.seed(seed) np.random.seed(seed) from Control_Exp1001.demo.thickener.adhdp_make import new_adhdp adhdp = new_adhdp(capacity=9) penalty = Quadratic(**penalty_para) env_adhdp = Thickener( penalty_calculator=penalty, **thickner_para, ) env_adhdp.reset() res1 = OneRoundExp(controller=adhdp, env=env_adhdp, max_step=rounds, exp_name=name).run() return res1
def run_ILPL(rounds=1000, seed=random.randint(0, 1000000), name='ILPL', predict_round=800): print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) ilpl = new_ILPL() penalty = Quadratic(**penalty_para) env_ILPL = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=ilpl, env=env_ILPL, max_step=rounds, exp_name=name).run() return res1
def run_adhdp(rounds=1000, seed=random.randint(0, 1000000)): print('seed :', seed) random.seed(seed) np.random.seed(seed) penalty = Quadratic(**penalty_para) env_adhdp = Thickener( penalty_calculator=penalty, **thickner_para, random_seed=seed, ) env_adhdp.reset() res1 = OneRoundExp(controller=adhdp, env=env_adhdp, max_step=rounds, exp_name='ADHDP').run() eval_res = OneRoundEvaluation(res_list=[res1]) eval_res.plot_all()
def run_vi_sample(rounds=1000, seed=random.randint(0, 1000000), name='VI_sample', capacity=2, predict_round=3000): print('seed :', seed) torch.manual_seed(seed) from Control_Exp1001.demo.thickener.vi_sample_maker import new_vi_sample vi_sample = new_vi_sample(capacity=capacity, predict_round=predict_round) penalty = Quadratic(**penalty_para) env_vi_sample = Thickener( penalty_calculator=penalty, **thickner_para, ) res1 = OneRoundExp(controller=vi_sample, env=env_vi_sample, max_step=rounds, exp_name=name).run() mse_vi_sample_pre.append(vi_sample.con_predict_mse) return res1
def run_adhdp(rounds=1000, seed=random.randint(0, 1000000), name='ADHDP', predict_round=800, random_act=False): print('seed :', seed) torch.manual_seed(seed) random.seed(seed) np.random.seed(seed) adhdp = new_adhdp(random_act=random_act) penalty = Quadratic(**penalty_para) env_hdp = Thickener( penalty_calculator=penalty, random_seed=seed, **thickner_para, ) res1 = OneRoundExp(controller=adhdp, env=env_hdp, max_step=rounds, exp_name=name).run() return res1