예제 #1
0
def new_vi(capacity=2, batch_size=2):

    capacity = capacity

    predict_round=3000
    u_optim='adam'
    gamma=0.6
    replay_vi = ReplayBuffer(capacity=capacity)
    env_VI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new vi controller')
    vi = VI(
        replay_buffer = replay_vi,
        u_bounds = env_VI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_VI,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = batch_size,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        actor_nn_lr = 0.005,
        critic_nn_lr = 0.01,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        Nc=1000,
        u_optim=u_optim,
        img_path=EXP_NAME
    )
    env_VI.reset()
    vi.train_identification_model()
    #vi.test_predict_model(test_rounds=100)
    return vi
예제 #2
0
def new_dhp_vi():
    capacity= 20
    predict_round=6000
    gamma=0.6
    replay_DhpVI = ReplayBuffer(capacity=capacity)
    env_DhpVI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new dhp_vi controller')
    dhp_vi = DhpVI(
        replay_buffer = replay_DhpVI,
        u_bounds = env_DhpVI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_DhpVI,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = 20,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.01,
        actor_nn_error_limit = 0.001,

        # 0.005
        actor_nn_lr = 0.005,
        critic_nn_lr = 0.001,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 12,
        hidden_actor = 14,
        predict_epoch= 30,
        Na=2000,
        Nc=100,
        test_period=3,
        max_u_iters=2000,
        policy_visual_period=400,
        img_path=EXP_NAME
    )
    env_DhpVI.reset()
    dhp_vi.train_identification_model()
    return dhp_vi
예제 #3
0
def new_dhp():
    capacity= 1
    predict_round=6000
    gamma=0.6
    replay_DHP = ReplayBuffer(capacity=capacity)
    env_DHP = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new dhp controller')
    dhp = DHP(
        replay_buffer = replay_DHP,
        u_bounds = env_DHP.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_DHP,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = 1,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.01,
        actor_nn_error_limit = 0.001,

        # 0.005
        actor_nn_lr = 0.005,
        critic_nn_lr = 0.001,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 12,
        hidden_actor = 14,
        predict_epoch= 30,
        Na=220,
        Nc=100,
        test_period=3,
        img_path=EXP_NAME,
    )
    env_DHP.reset()
    dhp.train_identification_model()
    return dhp
예제 #4
0
def new_vi_ub():
    capacity=2
    predict_round=3000
    u_optim='sgd'
    replay_vi = ReplayBuffer(capacity=capacity)
    env_VI = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new viuk controller')
    vi = VIub(
        replay_buffer = replay_vi,
        u_bounds = env_VI.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_VI,
        predict_training_rounds=predict_round,
        gamma=0.6,

        batch_size = capacity,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        actor_nn_lr = 0.005,
        critic_nn_lr = 0.01,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        u_optim=u_optim,
        find_lr= 0.4,
        find_time_max=20
    )
    env_VI.reset()
    vi.train_identification_model()
    return vi
예제 #5
0
def new_hdp():
    predict_round=3000
    gamma=0.6
    replay_hdp = ReplayBuffer(capacity=2)
    env_HDP = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new hdp controller')
    hdp = HDP(
        replay_buffer = replay_hdp,
        u_bounds = env_HDP.u_bounds,
        #exploration = None,
        exploration = exploration,
        env=env_HDP,
        predict_training_rounds=predict_round,
        gamma=gamma,

        batch_size = 2,
        predict_batch_size=32,

        model_nn_error_limit = 0.0008,
        critic_nn_error_limit = 0.001,
        actor_nn_error_limit = 0.001,

        # 0.005
        actor_nn_lr = 0.003,
        critic_nn_lr = 0.02,
        model_nn_lr = 0.01,

        indice_y = None,
        indice_y_star = None,
        indice_c=None,
        hidden_model = 10,
        hidden_critic = 14,
        hidden_actor = 14,
        predict_epoch= 30,
        Na=220,
        Nc = 500,
        img_path=EXP_NAME
    )
    env_HDP.reset()
    hdp.train_identification_model()
    return hdp
예제 #6
0
def new_vi_sample(capacity=2, predict_round=3000):
    replay_vi_sample = ReplayBuffer(capacity=capacity)
    env_VI_sample = Thickener(
        noise_p=0.03,
        noise_in=True,
    )
    exploration = No_Exploration()

    print('make new vi_sample controller')
    vi_sample = ViSample(
        replay_buffer=replay_vi_sample,
        u_bounds=env_VI_sample.u_bounds,
        #exploration = None,
        exploration=exploration,
        env=env_VI_sample,
        predict_training_rounds=predict_round,
        gamma=0.4,
        batch_size=capacity,
        predict_batch_size=32,
        model_nn_error_limit=0.0008,
        critic_nn_error_limit=0.001,
        actor_nn_error_limit=0.001,
        actor_nn_lr=0.005,
        critic_nn_lr=0.01,
        model_nn_lr=0.01,
        indice_y=None,
        indice_y_star=None,
        indice_c=None,
        hidden_model=10,
        hidden_critic=14,
        hidden_actor=14,
        predict_epoch=30,
    )
    env_VI_sample.reset()
    vi_sample.train_identification_model()
    vi_sample.test_predict_model(test_rounds=100)
    return vi_sample
예제 #7
0
               y_low=[-15, -15],
               y_high=[15, 15],
               u_high=[2, 2],
               u_low=[-2, -2],
               reward_calculator=reward3,
               normalize=False)
    replay_buffer1 = ReplayBuffer(1000)
    replay_buffer2 = ReplayBuffer(100)

    exploration_noise1 = EGreedy(
        action_bounds=env1.u_bounds,
        epsilon_start=0.5,
        epsilon_final=0.4,
        epsilon_decay=100000,
    )
    exploration_noise1 = No_Exploration()

    exploration_noise2 = GaussianExploration(
        action_bounds=env2.external_u_bounds,
        min_sigma=1.0,
        max_sigma=1.01,
        decay_period=100000)

    controller1 = Td3(gpu_id=1,
                      num_inputs=env1.observation_size(),
                      num_actions=2,
                      act_hidden_size=16,
                      val_hidden_size=16,
                      replay_buffer=replay_buffer1,
                      u_bounds=env1.u_bounds,
                      exploration=exploration_noise1,