device = torch.device("cuda:0") # device = torch.device(c.CPU) action_repeat = 1 num_frames = 1 aux_reward = MountainCarContinuousAuxiliaryReward() num_tasks = 1 + aux_reward.num_auxiliary_rewards experiment_setting = { # Auxiliary Tasks c.AUXILIARY_TASKS: {}, # Buffer c.BUFFER_PREPROCESSING: gt.AsType(), c.BUFFER_SETTING: { c.KWARGS: { c.MEMORY_SIZE: 50000 // action_repeat, c.OBS_DIM: (obs_dim, ), c.H_STATE_DIM: (1, ), c.ACTION_DIM: (action_dim, ), c.REWARD_DIM: (num_tasks, ), c.INFOS: { c.MEAN: ((action_dim, ), np.float32), c.VARIANCE: ((action_dim, ), np.float32), c.ENTROPY: ((action_dim, ), np.float32), c.LOG_PROB: ((1, ), np.float32), c.VALUE: ((1, ), np.float32), c.DISCOUNTING: ((1, ), np.float32) },
min_action = -np.ones(action_dim) max_action = np.ones(action_dim) # device = torch.device(c.CPU) device = torch.device("cuda:0") action_repeat = 1 num_frames = 1 memory_size = max_total_steps = 1000000 // action_repeat experiment_setting = { # Auxiliary Tasks c.AUXILIARY_TASKS: {}, # Buffer c.BUFFER_PREPROCESSING: gt.AsType(), c.BUFFER_SETTING: { c.KWARGS: { c.MEMORY_SIZE: memory_size, c.OBS_DIM: (obs_dim,), c.H_STATE_DIM: (1,), c.ACTION_DIM: (action_dim,), c.REWARD_DIM: (1,), c.INFOS: {c.MEAN: ((action_dim,), np.float32), c.VARIANCE: ((action_dim,), np.float32), c.ENTROPY: ((action_dim,), np.float32), c.LOG_PROB: ((1,), np.float32), c.VALUE: ((1,), np.float32), c.DISCOUNTING: ((1,), np.float32)}, c.CHECKPOINT_INTERVAL: 0, c.CHECKPOINT_PATH: None,
# device = torch.device(c.CPU) action_repeat = 1 num_frames = 1 hidden_state_dim = 128 memory_size = 1000000 max_total_steps = 1000000 experiment_setting = { # Auxiliary Tasks c.AUXILIARY_TASKS: {}, # Buffer c.BUFFER_PREPROCESSING: gt.Compose([gt.AsType()]), c.BUFFER_SETTING: { c.KWARGS: { c.MEMORY_SIZE: memory_size, c.OBS_DIM: (obs_dim, ), c.H_STATE_DIM: (hidden_state_dim * 2, ), c.ACTION_DIM: (action_dim, ), c.REWARD_DIM: (1, ), c.INFOS: { c.MEAN: ((action_dim, ), np.float32), c.VARIANCE: ((action_dim, ), np.float32), c.ENTROPY: ((action_dim, ), np.float32), c.LOG_PROB: ((1, ), np.float32), c.VALUE: ((1, ), np.float32), c.DISCOUNTING: ((1, ), np.float32) },