예제 #1
0
def run_vi(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='VI',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    vi_para = {'gamma': 0.2}
    vi = new_vi()
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()
    print(name, ':', vi.u_iter_times * 1.0 / rounds)

    return res1
예제 #2
0
def run_dhp(
    rounds=800,
    seed=random.randint(0, 1000000),
    name='DHP',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    #seed = 8312279
    print('seed :', seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    dhp = new_dhp()
    penalty = Quadratic(**penalty_para)
    env_dhp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=dhp,
                       env=env_dhp,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
예제 #3
0
def run_hdp(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='VI',
    capacity=2,
    batch_size=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)

    vi = new_hdp(capacity=capacity, batch_size=batch_size)
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()

    return res1
예제 #4
0
def run_hdp(rounds=1000,
            seed=random.randint(0, 1000000),
            name='HDP',
            predict_round=800):

    hdp_para = {'gamma': 0.9}
    print('seed :', seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    from Control_Exp1001.demo.thickener.hdp_maker import new_hdp
    hdp = new_hdp(predict_round=predict_round, **hdp_para)
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        random_seed=seed,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name=name).run()

    return res1
예제 #5
0
def run_vi_ub(rounds=1000,
              seed=random.randint(0, 1000000),
              name='VI_uk',
              capacity=2,
              predict_round=3000,
              u_optim='sgd'):

    print('seed :', seed)
    torch.manual_seed(seed)
    from Control_Exp1001.demo.thickener.vi_ub_maker import new_vi_ub
    vi = new_vi_ub(capacity=capacity,
                   predict_round=predict_round,
                   u_optim=u_optim)
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    mse_vi_pre.append(vi.con_predict_mse)
    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()
    print(name, ':', vi.u_iter_times * 1.0 / rounds)

    return res1
예제 #6
0
def run_adhdp_offpolicy(rounds=1000,
                        seed=random.randint(0, 1000000),
                        name='CoQL',
                        train_rounds=100,
                        train_step_in_round=100):

    print('seed :', seed)
    random.seed(seed)
    np.random.seed(seed)
    from Control_Exp1001.demo.thickener.adhdp_make import new_adhdp
    adhdp = new_adhdp(capacity=4, off_policy=True)
    penalty = Quadratic(**penalty_para)
    env_adhdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )
    env_adhdp.reset()
    res1 = OffLineExp(controller=adhdp,
                      env=env_adhdp,
                      max_step=rounds,
                      exp_name=name,
                      train_rounds=train_rounds,
                      train_step_in_round=train_step_in_round).run()

    return res1
예제 #7
0
def run_dhp_vi(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='DHPVI',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    dhp_vi_para = {
        #'gamma': 0.2
    }
    dhp_vi = new_dhp_vi()
    specific_penalty_para = copy.deepcopy(penalty_para)
    specific_penalty_para['S'] = [0.0001, 0.0008]
    penalty = Quadratic(**specific_penalty_para)
    env_dhp_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=dhp_vi,
                       env=env_dhp_vi,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
예제 #8
0
def run_hdp(rounds=1000,seed=random.randint(0,1000000),name='HDP', predict_round=800):
    print('seed :',seed)
    hdp_para = {
        'gamma':0.2
    }
    hdp = new_hdp()
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp, env=env_hdp,max_step=rounds, exp_name=name).run()
    return res1
예제 #9
0
def run_hdp_sample(rounds=1000, seed=random.randint(0, 1000000)):

    print('seed :', seed)
    print('hdp_sample')
    from Control_Exp1001.demo.thickener.hdp_sample_maker import hdp_sample
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp_sample,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name='HDP_sample').run()

    return res1
예제 #10
0
def run_adhdp(rounds=1000, seed=random.randint(0, 1000000), name='ADHDP'):

    print('seed :', seed)
    random.seed(seed)
    np.random.seed(seed)
    from Control_Exp1001.demo.thickener.adhdp_make import new_adhdp
    adhdp = new_adhdp(capacity=9)
    penalty = Quadratic(**penalty_para)
    env_adhdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )
    env_adhdp.reset()
    res1 = OneRoundExp(controller=adhdp,
                       env=env_adhdp,
                       max_step=rounds,
                       exp_name=name).run()

    return res1
예제 #11
0
def run_ILPL(rounds=1000,
             seed=random.randint(0, 1000000),
             name='ILPL',
             predict_round=800):
    print('seed :', seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    ilpl = new_ILPL()
    penalty = Quadratic(**penalty_para)
    env_ILPL = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=ilpl,
                       env=env_ILPL,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
예제 #12
0
def run_adhdp(rounds=1000, seed=random.randint(0, 1000000)):

    print('seed :', seed)
    random.seed(seed)
    np.random.seed(seed)
    penalty = Quadratic(**penalty_para)
    env_adhdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
        random_seed=seed,
    )

    env_adhdp.reset()
    res1 = OneRoundExp(controller=adhdp,
                       env=env_adhdp,
                       max_step=rounds,
                       exp_name='ADHDP').run()

    eval_res = OneRoundEvaluation(res_list=[res1])
    eval_res.plot_all()
예제 #13
0
def run_vi_sample(rounds=1000,
                  seed=random.randint(0, 1000000),
                  name='VI_sample',
                  capacity=2,
                  predict_round=3000):

    print('seed :', seed)
    torch.manual_seed(seed)
    from Control_Exp1001.demo.thickener.vi_sample_maker import new_vi_sample
    vi_sample = new_vi_sample(capacity=capacity, predict_round=predict_round)
    penalty = Quadratic(**penalty_para)
    env_vi_sample = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )
    res1 = OneRoundExp(controller=vi_sample,
                       env=env_vi_sample,
                       max_step=rounds,
                       exp_name=name).run()

    mse_vi_sample_pre.append(vi_sample.con_predict_mse)
    return res1
예제 #14
0
def run_adhdp(rounds=1000,
              seed=random.randint(0, 1000000),
              name='ADHDP',
              predict_round=800,
              random_act=False):
    print('seed :', seed)

    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    adhdp = new_adhdp(random_act=random_act)
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        random_seed=seed,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=adhdp,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name=name).run()
    return res1