Esempio n. 1
0
def run_adhdp_offpolicy(rounds=1000,
                        seed=random.randint(0, 1000000),
                        name='CoQL',
                        train_rounds=100,
                        train_step_in_round=100):

    print('seed :', seed)
    random.seed(seed)
    np.random.seed(seed)
    from Control_Exp1001.demo.thickener.adhdp_make import new_adhdp
    adhdp = new_adhdp(capacity=4, off_policy=True)
    penalty = Quadratic(**penalty_para)
    env_adhdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )
    env_adhdp.reset()
    res1 = OffLineExp(controller=adhdp,
                      env=env_adhdp,
                      max_step=rounds,
                      exp_name=name,
                      train_rounds=train_rounds,
                      train_step_in_round=train_step_in_round).run()

    return res1
Esempio n. 2
0
def run():

    # 定义积分惩罚项
    penalty = IntegralPenalty(weight_matrix=[200, 0.02], S=[0.1, 0.1])
    #penalty = IntegralPenalty(weight_matrix=[1,1], S=[0.00001,0.00001])
    # 定义初始化env对象的参数
    env_para = {
        "dt": 1,
        "normalize": False,
        "noise_in": False,
        "penalty_calculator": penalty,
        "y_star": [1.55, 650],
        "y_start": [1.4, 680]
        #"y_star": np.array([17.32, 0.84], dtype=float)
    }

    env = Thickener(**env_para)

    env.reset()
    # 回放池大小为1,batch_size为1
    replaybuff = ReplayBuffer(capacity=1)
    # 参照论文给出的参数
    controller = ILPL(env=env,
                      u_bounds=env.u_bounds,
                      replay_buffer=replaybuff,
                      Vm=np.diag([1, 0.01, 0.1, 0.1, 0.1, 0.1]),
                      Lm=np.diag([1, 0.01]),
                      Va=np.diag([1, 0.01, 1, 0.01, 0.1, 0.1]),
                      La=np.diag([1, 1]),
                      Vc=np.diag([1, 0.01, 1, 0.01, 0.1, 0.1]),
                      Lc=np.diag([0.1]),
                      predict_training_rounds=5000,
                      gamma=0.6,
                      batch_size=1,
                      predict_batch_size=32,
                      model_nn_error_limit=0.08,
                      critic_nn_error_limit=0.1,
                      actor_nn_loss=0.6,
                      u_iter=30,
                      u_begin=[80, 38],
                      indice_y=[2, 3],
                      indice_y_star=[0, 1],
                      u_first=[80, 38])
    # 定义实验块
    exp = OneRoundExp(env=env,
                      controller=controller,
                      max_step=300,
                      exp_name="ILPL")

    res = exp.run()
    eval_res = OneRoundEvaluation(res_list=[res])
    eval_res.plot_all()
Esempio n. 3
0
def new_dhp_vi():
    capacity= 20
    predict_round=6000
    gamma=0.6
    replay_DhpVI = ReplayBuffer(capacity=capacity)
    env_DhpVI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new dhp_vi controller')
    dhp_vi = DhpVI(
        replay_buffer = replay_DhpVI,
        u_bounds = env_DhpVI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_DhpVI,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = 20,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.01,
        actor_nn_error_limit = 0.001,

        # 0.005
        actor_nn_lr = 0.005,
        critic_nn_lr = 0.001,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 12,
        hidden_actor = 14,
        predict_epoch= 30,
        Na=2000,
        Nc=100,
        test_period=3,
        max_u_iters=2000,
        policy_visual_period=400,
        img_path=EXP_NAME
    )
    env_DhpVI.reset()
    dhp_vi.train_identification_model()
    return dhp_vi
Esempio n. 4
0
def new_vi():
    capacity=2 #经验池的大小,需要大于或等于batch_size
    predict_round=3000
    u_optim='sgd' # 寻找u*使用的梯度下降算法
    gamma=0.6
    replay_vi = ReplayBuffer(capacity=capacity)
    # 这个浓密机是用来生成数据训练预测模型用的
    env_VI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new vi controller')
    vi = VI(
        replay_buffer = replay_vi,
        u_bounds = env_VI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_VI,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = capacity,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        actor_nn_lr = 0.005,
        critic_nn_lr = 0.02,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        Nc=500,
        u_optim=u_optim,
        img_path=EXP_NAME
    )
    env_VI.reset()
    vi.train_identification_model()
    #vi.test_predict_model(test_rounds=100)
    return vi
Esempio n. 5
0
def new_dhp():
    capacity= 1
    predict_round=6000
    gamma=0.6
    replay_DHP = ReplayBuffer(capacity=capacity)
    env_DHP = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new dhp controller')
    dhp = DHP(
        replay_buffer = replay_DHP,
        u_bounds = env_DHP.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_DHP,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = 1,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.01,
        actor_nn_error_limit = 0.001,

        # 0.005
        actor_nn_lr = 0.005,
        critic_nn_lr = 0.001,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 12,
        hidden_actor = 14,
        predict_epoch= 30,
        Na=220,
        Nc=100,
        test_period=3,
        img_path=EXP_NAME,
    )
    env_DHP.reset()
    dhp.train_identification_model()
    return dhp
Esempio n. 6
0
def run_hdp(rounds=1000,
            seed=random.randint(0, 1000000),
            name='HDP',
            predict_round=800):

    hdp_para = {'gamma': 0.9}
    print('seed :', seed)
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    from Control_Exp1001.demo.thickener.hdp_maker import new_hdp
    hdp = new_hdp(predict_round=predict_round, **hdp_para)
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        random_seed=seed,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name=name).run()

    return res1
Esempio n. 7
0
def run_vi_ub(rounds=1000,
              seed=random.randint(0, 1000000),
              name='VI_uk',
              capacity=2,
              predict_round=3000,
              u_optim='sgd'):

    print('seed :', seed)
    torch.manual_seed(seed)
    from Control_Exp1001.demo.thickener.vi_ub_maker import new_vi_ub
    vi = new_vi_ub(capacity=capacity,
                   predict_round=predict_round,
                   u_optim=u_optim)
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    mse_vi_pre.append(vi.con_predict_mse)
    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()
    print(name, ':', vi.u_iter_times * 1.0 / rounds)

    return res1
def run_dhp_vi(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='DHPVI',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    dhp_vi_para = {
        #'gamma': 0.2
    }
    dhp_vi = new_dhp_vi()
    specific_penalty_para = copy.deepcopy(penalty_para)
    specific_penalty_para['S'] = [0.0001, 0.0008]
    penalty = Quadratic(**specific_penalty_para)
    env_dhp_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=dhp_vi,
                       env=env_dhp_vi,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
Esempio n. 9
0
def new_adhdp(capacity=2, off_policy=False):
    replay_hdp = ReplayBuffer(capacity=capacity)
    env_ADHDP = Thickener()
    #exploration = No_Exploration()
    exploration = EGreedy(env_ADHDP.external_u_bounds,
                          epsilon_start=0.5,
                          epsilon_final=0,
                          epsilon_decay=1000)
    adhdp = ADHDP(
        replay_buffer=replay_hdp,
        u_bounds=env_ADHDP.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_ADHDP,
        gamma=0.7,
        batch_size=capacity,
        predict_batch_size=32,
        critic_nn_error_limit=0.02,
        actor_nn_error_limit=0.001,
        actor_nn_lr=0.01,
        critic_nn_lr=0.01,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_critic=6,
        hidden_actor=6,
        max_iter_c=50,
        off_policy=off_policy,
    )
    return adhdp
Esempio n. 10
0
def run_vi(
    rounds=1000,
    seed=random.randint(0, 1000000),
    name='VI',
    capacity=2,
    predict_round=3000,
    u_optim='adam',
):

    print('seed :', seed)
    torch.manual_seed(seed)
    vi_para = {'gamma': 0.2}
    vi = new_vi()
    penalty = Quadratic(**penalty_para)
    env_vi = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=vi,
                       env=env_vi,
                       max_step=rounds,
                       exp_name=name).run()
    print(name, ':', vi.u_iter_times * 1.0 / rounds)

    return res1
Esempio n. 11
0
def new_hdp():
    predict_round=3000
    gamma=0.6
    replay_hdp = ReplayBuffer(capacity=2)
    env_HDP = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new hdp controller')
    hdp = HDP(
        replay_buffer = replay_hdp,
        u_bounds = env_HDP.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_HDP,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = 2,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        # 0.005
        actor_nn_lr = 0.003,
        critic_nn_lr = 0.02,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        Na=220,
        Nc = 500,
        img_path=EXP_NAME
    )
    env_HDP.reset()
    hdp.train_identification_model()
    return hdp
Esempio n. 12
0
def new_vi_ub():
    capacity=2
    predict_round=3000
    u_optim='sgd'
    replay_vi = ReplayBuffer(capacity=capacity)
    env_VI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new viuk controller')
    vi = VIub(
        replay_buffer = replay_vi,
        u_bounds = env_VI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_VI,
        predict_training_rounds=predict_round,
        gamma=0.6,

        batch_size = capacity,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        actor_nn_lr = 0.005,
        critic_nn_lr = 0.01,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        u_optim=u_optim,
        find_lr= 0.4,
        find_time_max=20
    )
    env_VI.reset()
    vi.train_identification_model()
    return vi
Esempio n. 13
0
def run_adhdp(rounds=1000, seed=random.randint(0, 1000000), name='ADHDP'):

    print('seed :', seed)
    random.seed(seed)
    np.random.seed(seed)
    from Control_Exp1001.demo.thickener.adhdp_make import new_adhdp
    adhdp = new_adhdp(capacity=9)
    penalty = Quadratic(**penalty_para)
    env_adhdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )
    env_adhdp.reset()
    res1 = OneRoundExp(controller=adhdp,
                       env=env_adhdp,
                       max_step=rounds,
                       exp_name=name).run()

    return res1
Esempio n. 14
0
def run_adhdp(rounds=1000, seed=random.randint(0, 1000000)):

    print('seed :', seed)
    random.seed(seed)
    np.random.seed(seed)
    penalty = Quadratic(**penalty_para)
    env_adhdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
        random_seed=seed,
    )

    env_adhdp.reset()
    res1 = OneRoundExp(controller=adhdp,
                       env=env_adhdp,
                       max_step=rounds,
                       exp_name='ADHDP').run()

    eval_res = OneRoundEvaluation(res_list=[res1])
    eval_res.plot_all()
Esempio n. 15
0
def test_model_hidden():

    env = Thickener(noise_in=True)
    env.reset()
    loss_list = []
    hid_size_list = []
    for hidden_size in range(6, 30, 2):
        controller = HDP(
            replay_buffer=None,
            u_bounds=env.u_bounds,
            env=env,
            predict_training_rounds=10000,
            gamma=0.6,
            batch_size=1,
            predict_batch_size=32,
            model_nn_error_limit=0.00008,
            critic_nn_error_limit=0.9,
            actor_nn_error_limit=0.1,
            actor_nn_lr=0.003,
            critic_nn_lr=0.2,
            model_nn_lr=0.01,
            indice_y=None,
            indice_y_star=None,
            indice_c=None,
            hidden_model=hidden_size,
            hidden_critic=10,
            hidden_actor=10,
            predict_epoch=40,
        )
        hid_size_list.append(hidden_size)
        controller.train_identification_model()
        loss = controller.cal_predict_mse(test_rounds=3000)
        loss_list.append(loss)

    plt.plot(hid_size_list, loss_list)

    plt.legend(['loss in test'])
    plt.show()
Esempio n. 16
0
def run_dhp(rounds=800,seed=random.randint(0,1000000),name='DHP',capacity=2,
            predict_round=3000,u_optim='adam',):

    print('seed :',seed)
    torch.manual_seed(seed)
    dhp = new_dhp()
    penalty = Quadratic(**penalty_para)
    env_dhp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=dhp, env=env_dhp,max_step=rounds, exp_name=name).run()
    return res1
Esempio n. 17
0
def run_hdp(rounds=1000,seed=random.randint(0,1000000),name='HDP', predict_round=800):
    print('seed :',seed)
    hdp_para = {
        'gamma':0.2
    }
    hdp = new_hdp()
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp, env=env_hdp,max_step=rounds, exp_name=name).run()
    return res1
Esempio n. 18
0
def new_vi_sample(capacity=2, predict_round=3000):
    replay_vi_sample = ReplayBuffer(capacity=capacity)
    env_VI_sample = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new vi_sample controller')
    vi_sample = ViSample(
        replay_buffer=replay_vi_sample,
        u_bounds=env_VI_sample.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_VI_sample,
        predict_training_rounds=predict_round,
        gamma=0.4,
        batch_size=capacity,
        predict_batch_size=32,
        model_nn_error_limit=0.0008,
        critic_nn_error_limit=0.001,
        actor_nn_error_limit=0.001,
        actor_nn_lr=0.005,
        critic_nn_lr=0.01,
        model_nn_lr=0.01,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_model=10,
        hidden_critic=14,
        hidden_actor=14,
        predict_epoch=30,
    )
    env_VI_sample.reset()
    vi_sample.train_identification_model()
    vi_sample.test_predict_model(test_rounds=100)
    return vi_sample
Esempio n. 19
0
def run_hdp_sample(rounds=1000, seed=random.randint(0, 1000000)):

    print('seed :', seed)
    print('hdp_sample')
    from Control_Exp1001.demo.thickener.hdp_sample_maker import hdp_sample
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=hdp_sample,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name='HDP_sample').run()

    return res1
Esempio n. 20
0
def new_adhdp(random_act=False):
    period = 20
    capacity = period
    train_period = period
    batch_size = period
    off_policy = False
    replay_hdp = ReplayBuffer(capacity=capacity)
    env_ADHDP = Thickener()
    #exploration = No_Exploration()
    #exploration = EGreedy(env_ADHDP.external_u_bounds, epsilon_start=0.6,epsilon_final=0,epsilon_decay=10)
    exploration = GaussianExploration(
        action_bounds=env_ADHDP.external_u_bounds,
        min_sigma=0.00,
        max_sigma=0.01,
        decay_period=600)
    if random_act:
        exploration = EGreedy(action_bounds=env_ADHDP.external_u_bounds,
                              epsilon_start=1,
                              epsilon_final=1,
                              epsilon_decay=100)
        train_period = 20
    adhdp = ADHDP(
        replay_buffer=replay_hdp,
        u_bounds=env_ADHDP.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_ADHDP,
        gamma=0.8,
        batch_size=batch_size,
        predict_batch_size=32,
        critic_nn_error_limit=0.05,
        actor_nn_error_limit=0.001,
        actor_nn_lr=0.003,
        critic_nn_lr=0.05,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_critic=16,
        hidden_actor=20,
        off_policy=off_policy,
        Nc=1000,
        Na=50,
        train_period=train_period,
        test_period=1)
    return adhdp
Esempio n. 21
0
def run_vi_sample(rounds=1000,
                  seed=random.randint(0, 1000000),
                  name='VI_sample',
                  capacity=2,
                  predict_round=3000):

    print('seed :', seed)
    torch.manual_seed(seed)
    from Control_Exp1001.demo.thickener.vi_sample_maker import new_vi_sample
    vi_sample = new_vi_sample(capacity=capacity, predict_round=predict_round)
    penalty = Quadratic(**penalty_para)
    env_vi_sample = Thickener(
        penalty_calculator=penalty,
        **thickner_para,
    )
    res1 = OneRoundExp(controller=vi_sample,
                       env=env_vi_sample,
                       max_step=rounds,
                       exp_name=name).run()

    mse_vi_sample_pre.append(vi_sample.con_predict_mse)
    return res1
Esempio n. 22
0
def run_adhdp(rounds=1000,
              seed=random.randint(0, 1000000),
              name='ADHDP',
              predict_round=800,
              random_act=False):
    print('seed :', seed)

    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)
    adhdp = new_adhdp(random_act=random_act)
    penalty = Quadratic(**penalty_para)
    env_hdp = Thickener(
        penalty_calculator=penalty,
        random_seed=seed,
        **thickner_para,
    )

    res1 = OneRoundExp(controller=adhdp,
                       env=env_hdp,
                       max_step=rounds,
                       exp_name=name).run()
    return res1
Esempio n. 23
0
    def __init__(self,

                 gpu_id=1,
                 replay_buffer = None,
                 u_bounds = None,
                 exploration = None,
                 env=None,
                 predict_training_rounds=10000,
                 Vm=None,
                 Lm=None,
                 Va=None,
                 La=None,
                 Vc=None,
                 Lc=None,
                 gamma=0.6,

                 batch_size = 1,
                 predict_batch_size=32,
                 model_nn_error_limit = 0.08,
                 critic_nn_error_limit = 1,
                 actor_nn_loss = 0.1,

                 u_iter=30,
                 u_begin = None,
                 indice_y = None,
                 indice_y_star = None,
                 indice_c = None,
                 u_first = None


                 ):
        """

        :param gpu_id:
        :param replay_buffer:
        :param u_bounds:
        :param exploration:
        :param env:
        :param predict_training_rounds:  训练预测模型时使用的真实数据条数
        :param Vm:
        :param Lm:
        :param Va:
        :param La:
        :param Vc:
        :param Lc:
        :param gamma:
        :param batch_size:
        :param predict_batch_size: 训练预测模型时的batch_size
        :param model_nn_error_limit:
        :param critic_nn_error_limit:  critic网络的误差限
        :param actor_nn_loss:
        :param u_iter: 求解u*时的迭代次数
        :param u_begin: 求解u*时,第一次迭代的其实u(k)
        :param indice_y: y在state中的位置
        :param indice_y_star: *在state中的位置
        :param u_first: 第一次控制时的命令
        """
        super(ILPL, self).__init__(gpu_id=gpu_id,replay_buffer=replay_buffer,
                                   u_bounds=u_bounds,exploration=exploration)
        if env is None:
            env = Thickener()

        self.env=env
        self.predict_training_rounds = predict_training_rounds

        self.device = None
        self.cuda_device(gpu_id)
        self.batch_size = batch_size
        self.predict_batch_size = predict_batch_size


        self.indice_c = [6, 7]

        self.predict_training_losses = []
        self.model_nn = None
        self.model_nn_error_limit = model_nn_error_limit
        self.critic_nn_error_limit = critic_nn_error_limit
        self.actor_nn_error_limit = actor_nn_loss

        self.u_iter = u_iter

        # Train model neural network
        self.train_identification_model(Vm=Vm,Lm=Lm)
        self.test_predict_model(test_rounds=400)

        #定义actor网络相关
        self.actor_nn = None
        self.actor_nn_init(Va=Va,La=La)


        #定义critic网络相关
        self.critic_nn = None
        self.critic_nn_init(Vc=Vc,Lc=Lc)

        self.gamma = gamma
        self.u_begin = u_begin

        if indice_y is None:
            indice_y = [2,3]
        if indice_y_star is None:
            indice_y_star = [0,1]
        self.indice_y = indice_y
        self.indice_y_star = indice_y_star

        if u_first is None:
            u_first = np.array([1.8, 19])
        self.u_first = u_first
        self.first_act = True


        # 用来画图用
        self.u0_plt = PltUtil()
        self.u1_plt = PltUtil()
        self.y0_plt = PltUtil()
        self.y1_plt = PltUtil()
        self.wa_plt = PltUtil()
        self.wm_plt = PltUtil()
        self.wc_plt = PltUtil()
Esempio n. 24
0
    def __init__(self,
                 gpu_id=1,
                 replay_buffer=None,
                 u_bounds=None,
                 exploration=None,
                 env=None,
                 predict_training_rounds=10000,
                 gamma=0.6,
                 batch_size=1,
                 predict_batch_size=32,
                 model_nn_error_limit=0.08,
                 critic_nn_error_limit=1,
                 actor_nn_error_limit=0.1,
                 actor_nn_lr=0.01,
                 critic_nn_lr=0.01,
                 model_nn_lr=0.01,
                 indice_y=None,
                 indice_u=None,
                 indice_y_star=None,
                 indice_c=None,
                 hidden_model=10,
                 hidden_critic=10,
                 hidden_actor=10,
                 predict_epoch=35):
        """

        :param gpu_id:
        :param replay_buffer:
        :param u_bounds:
        :param exploration:
        :param env:
        :param predict_training_rounds:  训练预测模型时使用的真实数据条数
        :param Vm:
        :param Lm:
        :param Va:
        :param La:
        :param Vc:
        :param Lc:
        :param gamma:
        :param batch_size:
        :param predict_batch_size: 训练预测模型时的batch_size
        :param model_nn_error_limit:
        :param critic_nn_error_limit:  critic网络的误差限
        :param actor_nn_loss:
        :param u_iter: 求解u*时的迭代次数
        :param u_begin: 求解u*时,第一次迭代的其实u(k)
        :param indice_y: y在state中的位置
        :param indice_y_star: *在state中的位置
        :param u_first: 第一次控制时的命令
        """
        super(HDP_sample, self).__init__(gpu_id=gpu_id,
                                         replay_buffer=replay_buffer,
                                         u_bounds=u_bounds,
                                         exploration=exploration)
        if env is None:
            env = Thickener()

        self.env = env
        self.predict_training_rounds = predict_training_rounds

        self.device = None
        self.cuda_device(gpu_id)
        self.batch_size = batch_size
        self.predict_batch_size = predict_batch_size

        self.predict_training_losses = []
        self.model_nn = None
        self.model_nn_error_limit = model_nn_error_limit
        self.critic_nn_error_limit = critic_nn_error_limit
        self.actor_nn_error_limit = actor_nn_error_limit

        dim_c = env.size_yudc[3]
        dim_y = env.size_yudc[0]
        dim_u = env.size_yudc[1]
        # Train model neural network
        self.model_nn = nn.Sequential(
            nn.Linear(dim_y + dim_u + dim_c, hidden_model), nn.Tanh(),
            nn.Linear(hidden_model, dim_y))
        self.model_nn_optim = torch.optim.Adam(self.model_nn.parameters(),
                                               lr=model_nn_lr)
        #self.train_identification_model()

        #mse = self.test_predict_model(test_rounds=400)

        #定义actor网络相关
        self.actor_nn = nn.Sequential(
            nn.Linear(2 * dim_y + dim_c, hidden_actor, bias=False),
            nn.Tanh(),
            nn.Linear(hidden_actor, dim_u),
            nn.Tanh(),
            # nn.Linear(dim_u, dim_u)
        )

        self.actor_nn_optim = torch.optim.Adam(self.actor_nn.parameters(),
                                               lr=actor_nn_lr)

        #定义critic网络相关:HDP

        self.critic_nn = nn.Sequential(
            nn.Linear(dim_y + dim_y + dim_c, hidden_critic, bias=False),
            nn.Tanh(),
            nn.Linear(hidden_critic, 1),
        )
        self.critic_nn_optim = torch.optim.Adam(self.critic_nn.parameters(),
                                                lr=critic_nn_lr)
        self.critic_criterion = torch.nn.MSELoss()

        self.gamma = gamma

        if indice_y is None:
            indice_y = [2, 3]
        if indice_y_star is None:
            indice_y_star = [0, 1]
        if indice_u is None:
            indice_u = [4, 5]
        self.indice_y = indice_y
        self.indice_y_star = indice_y_star
        self.indice_c = [6, 7]
        self.indice_u = indice_u
        self.predict_epoch = predict_epoch
Esempio n. 25
0
import math
import Control_Exp1001 as CE
import os
import json

from Control_Exp1001.demo.thickener.adhdp import ADHDP
from Control_Exp1001.simulation.thickener import Thickener
from Control_Exp1001.common.penaltys.demo_penalty import DemoPenalty
import matplotlib.pyplot as plt
from Control_Exp1001.demo.thickener.one_round_exp import OneRoundExp
from Control_Exp1001.demo.thickener.one_round_evaluation import OneRoundEvaluation
from Control_Exp1001.common.action_noise.e_greedy import EGreedy
from Control_Exp1001.common.replay.replay_buffer import ReplayBuffer

replay_hdp = ReplayBuffer(capacity=20)
env_ADHDP = Thickener()
exploration = EGreedy(epsilon_start=1,
                      epsilon_final=0.0001,
                      epsilon_decay=300,
                      action_bounds=env_ADHDP.u_bounds)

adhdp = ADHDP(
    replay_buffer=replay_hdp,
    u_bounds=env_ADHDP.u_bounds,
    #exploration = None,
    exploration=exploration,
    env=env_ADHDP,
    gamma=0.1,
    batch_size=10,
    predict_batch_size=32,
    critic_nn_error_limit=0.02,
Esempio n. 26
0
    def __init__(
        self,
        gpu_id=1,
        replay_buffer=None,
        u_bounds=None,
        exploration=None,
        env=None,
        gamma=0.6,
        batch_size=1,
        predict_batch_size=32,
        critic_nn_error_limit=1,
        actor_nn_error_limit=0.1,
        actor_nn_lr=0.01,
        critic_nn_lr=0.01,
        indice_y=None,
        indice_u=None,
        indice_y_star=None,
        indice_c=None,
        hidden_critic=10,
        hidden_actor=10,
        off_policy=False,
        Nc=500,
        Na=500,
        train_period=100,
        test_period=1,
    ):
        """

        :param gpu_id:
        :param replay_buffer:
        :param u_bounds:
        :param exploration:
        :param env:
        :param predict_training_rounds:  训练预测模型时使用的真实数据条数
        :param Vm:
        :param Lm:
        :param Va:
        :param La:
        :param Vc:
        :param Lc:
        :param gamma:
        :param batch_size:
        :param predict_batch_size: 训练预测模型时的batch_size
        :param model_nn_error_limit:
        :param critic_nn_error_limit:  critic网络的误差限
        :param actor_nn_loss:
        :param u_iter: 求解u*时的迭代次数
        :param u_begin: 求解u*时,第一次迭代的其实u(k)
        :param indice_y: y在state中的位置
        :param indice_y_star: *在state中的位置
        :param u_first: 第一次控制时的命令
        """
        super(ADHDP, self).__init__(gpu_id=gpu_id,
                                    replay_buffer=replay_buffer,
                                    u_bounds=u_bounds,
                                    exploration=exploration)
        if env is None:
            env = Thickener()

        self.env = env

        self.device = None
        self.cuda_device(gpu_id)
        self.batch_size = batch_size

        self.critic_nn_error_limit = critic_nn_error_limit
        self.actor_nn_error_limit = actor_nn_error_limit

        dim_c = env.size_yudc[3]
        dim_y = env.size_yudc[0]
        dim_u = env.size_yudc[1]

        #定义actor网络相关
        self.actor_nn = nn.Sequential(
            nn.Linear(2 * dim_y + dim_c, hidden_actor, bias=False),
            #nn.Tanh(),
            #nn.Sigmoid(),
            nn.ReLU(),
            nn.Linear(hidden_actor, dim_u, bias=False),
            nn.Tanh(),
            # nn.Linear(dim_u, dim_u)
        )

        self.actor_nn_optim = torch.optim.SGD(self.actor_nn.parameters(),
                                              lr=actor_nn_lr)

        #定义critic网络相关:HDP

        self.critic_nn = nn.Sequential(
            nn.Linear(dim_y + dim_y + dim_c + dim_u, hidden_critic,
                      bias=False),
            nn.Tanh(),
            #nn.ReLU(),
            nn.Linear(hidden_critic, 1, bias=False),
        )
        self.critic_nn_optim = torch.optim.SGD(self.critic_nn.parameters(),
                                               lr=critic_nn_lr)
        self.critic_criterion = torch.nn.MSELoss()

        self.gamma = gamma

        if indice_y is None:
            indice_y = [2, 3]
        if indice_y_star is None:
            indice_y_star = [0, 1]
        if indice_u is None:
            indice_u = [4, 5]
        self.indice_y = indice_y
        self.indice_y_star = indice_y_star
        self.indice_c = [6, 7]
        self.indice_u = indice_u
        self.off_policy = off_policy
        self.Nc = Nc
        self.Na = Na
        self.train_period = train_period
        self.test_period = test_period
Esempio n. 27
0
import math
import Control_Exp1001 as CE
import os
import json

from Control_Exp1001.demo.thickener.hdp_sample import HDP_sample
from Control_Exp1001.simulation.thickener import Thickener
from Control_Exp1001.common.penaltys.demo_penalty import DemoPenalty
import matplotlib.pyplot as plt
from Control_Exp1001.demo.thickener.one_round_exp import OneRoundExp
from Control_Exp1001.demo.thickener.one_round_evaluation import OneRoundEvaluation
from Control_Exp1001.common.action_noise.e_greedy import EGreedy
from Control_Exp1001.common.replay.replay_buffer import ReplayBuffer

replay_hdp_sample = ReplayBuffer(capacity=30)
env_HDP_sample = Thickener(noise_p=0.01, noise_in=True)
exploration = EGreedy(epsilon_start=0.0,
                      epsilon_final=0.0000,
                      epsilon_decay=100,
                      action_bounds=env_HDP_sample.u_bounds)

hdp_sample = HDP_sample(
    replay_buffer=replay_hdp_sample,
    u_bounds=env_HDP_sample.u_bounds,
    #exploration = None,
    exploration=exploration,
    env=env_HDP_sample,
    predict_training_rounds=3000,
    gamma=0.1,
    batch_size=10,
    predict_batch_size=32,