예제 #1
0
device = torch.device("cuda:0")
# device = torch.device(c.CPU)

action_repeat = 1
num_frames = 1

aux_reward = MountainCarContinuousAuxiliaryReward()
num_tasks = 1 + aux_reward.num_auxiliary_rewards

experiment_setting = {
    # Auxiliary Tasks
    c.AUXILIARY_TASKS: {},

    # Buffer
    c.BUFFER_PREPROCESSING:
    gt.AsType(),
    c.BUFFER_SETTING: {
        c.KWARGS: {
            c.MEMORY_SIZE: 50000 // action_repeat,
            c.OBS_DIM: (obs_dim, ),
            c.H_STATE_DIM: (1, ),
            c.ACTION_DIM: (action_dim, ),
            c.REWARD_DIM: (num_tasks, ),
            c.INFOS: {
                c.MEAN: ((action_dim, ), np.float32),
                c.VARIANCE: ((action_dim, ), np.float32),
                c.ENTROPY: ((action_dim, ), np.float32),
                c.LOG_PROB: ((1, ), np.float32),
                c.VALUE: ((1, ), np.float32),
                c.DISCOUNTING: ((1, ), np.float32)
            },
예제 #2
0
min_action = -np.ones(action_dim)
max_action = np.ones(action_dim)
# device = torch.device(c.CPU)
device = torch.device("cuda:0")

action_repeat = 1
num_frames = 1

memory_size = max_total_steps = 1000000 // action_repeat

experiment_setting = {
    # Auxiliary Tasks
    c.AUXILIARY_TASKS: {},

    # Buffer
    c.BUFFER_PREPROCESSING: gt.AsType(),
    c.BUFFER_SETTING: {
        c.KWARGS: {
            c.MEMORY_SIZE: memory_size,
            c.OBS_DIM: (obs_dim,),
            c.H_STATE_DIM: (1,),
            c.ACTION_DIM: (action_dim,),
            c.REWARD_DIM: (1,),
            c.INFOS: {c.MEAN: ((action_dim,), np.float32),
                      c.VARIANCE: ((action_dim,), np.float32),
                      c.ENTROPY: ((action_dim,), np.float32),
                      c.LOG_PROB: ((1,), np.float32),
                      c.VALUE: ((1,), np.float32),
                      c.DISCOUNTING: ((1,), np.float32)},
            c.CHECKPOINT_INTERVAL: 0,
            c.CHECKPOINT_PATH: None,
# device = torch.device(c.CPU)

action_repeat = 1
num_frames = 1
hidden_state_dim = 128

memory_size = 1000000
max_total_steps = 1000000

experiment_setting = {
    # Auxiliary Tasks
    c.AUXILIARY_TASKS: {},

    # Buffer
    c.BUFFER_PREPROCESSING:
    gt.Compose([gt.AsType()]),
    c.BUFFER_SETTING: {
        c.KWARGS: {
            c.MEMORY_SIZE: memory_size,
            c.OBS_DIM: (obs_dim, ),
            c.H_STATE_DIM: (hidden_state_dim * 2, ),
            c.ACTION_DIM: (action_dim, ),
            c.REWARD_DIM: (1, ),
            c.INFOS: {
                c.MEAN: ((action_dim, ), np.float32),
                c.VARIANCE: ((action_dim, ), np.float32),
                c.ENTROPY: ((action_dim, ), np.float32),
                c.LOG_PROB: ((1, ), np.float32),
                c.VALUE: ((1, ), np.float32),
                c.DISCOUNTING: ((1, ), np.float32)
            },