Beispiel #1
0
def new_vi(capacity=2, batch_size=2):

    capacity = capacity

    predict_round=3000
    u_optim='adam'
    gamma=0.6
    replay_vi = ReplayBuffer(capacity=capacity)
    env_VI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new vi controller')
    vi = VI(
        replay_buffer = replay_vi,
        u_bounds = env_VI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_VI,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = batch_size,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        actor_nn_lr = 0.005,
        critic_nn_lr = 0.01,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        Nc=1000,
        u_optim=u_optim,
        img_path=EXP_NAME
    )
    env_VI.reset()
    vi.train_identification_model()
    #vi.test_predict_model(test_rounds=100)
    return vi
def new_dhp():
    capacity = 3
    predict_round = 3000
    gamma = 0.6
    replay_DHP = ReplayBuffer(capacity=capacity)
    env_DHP = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new dhp controller')
    dhp = DHP(
        replay_buffer=replay_DHP,
        u_bounds=env_DHP.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_DHP,
        predict_training_rounds=predict_round,
        gamma=gamma,
        batch_size=2,
        predict_batch_size=32,
        model_nn_error_limit=0.0008,
        critic_nn_error_limit=0.001,
        actor_nn_error_limit=0.0001,

        # 0.005
        actor_nn_lr=0.008,
        critic_nn_lr=0.01,
        model_nn_lr=0.01,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_model=10,
        hidden_critic=12,
        hidden_actor=14,
        predict_epoch=30,
        Na=200,
        Nc=300,
        img_path=EXP_NAME)
    env_DHP.reset()
    dhp.train_identification_model()
    return dhp
def new_ILPL():
    predict_round = 3000
    gamma = 0.6
    replay_ILPL = ReplayBuffer(capacity=4)
    env_ILPL = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new ilpl controller')
    ilpl = ILPL(
        replay_buffer=replay_ILPL,
        u_bounds=env_ILPL.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_ILPL,
        predict_training_rounds=predict_round,
        gamma=gamma,
        batch_size=2,
        predict_batch_size=32,
        model_nn_error_limit=0.0008,
        critic_nn_error_limit=0.1,
        actor_nn_error_limit=0.001,

        # 0.005
        actor_nn_lr=0.001,
        critic_nn_lr=0.02,
        model_nn_lr=0.01,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_model=10,
        hidden_critic=14,
        hidden_actor=14,
        predict_epoch=30,
        Na=220,
        Nc=500,
        img_path=EXP_NAME)
    env_ILPL.reset()
    ilpl.train_identification_model()
    return ilpl