# device = torch.device(c.CPU) action_repeat = 1 num_frames = 1 hidden_state_dim = 128 memory_size = 1000000 max_total_steps = 1000000 experiment_setting = { # Auxiliary Tasks c.AUXILIARY_TASKS: {}, # Buffer c.BUFFER_PREPROCESSING: gt.Compose([gt.AsType()]), c.BUFFER_SETTING: { c.KWARGS: { c.MEMORY_SIZE: memory_size, c.OBS_DIM: (obs_dim, ), c.H_STATE_DIM: (hidden_state_dim * 2, ), c.ACTION_DIM: (action_dim, ), c.REWARD_DIM: (1, ), c.INFOS: { c.MEAN: ((action_dim, ), np.float32), c.VARIANCE: ((action_dim, ), np.float32), c.ENTROPY: ((action_dim, ), np.float32), c.LOG_PROB: ((1, ), np.float32), c.VALUE: ((1, ), np.float32), c.DISCOUNTING: ((1, ), np.float32) },
obs_dim = int(np.product(raw_img_dim) + scalar_feature_dim) latent_dim = 50 memory_size = max_total_steps = 1000000 // action_repeat device = torch.device("cuda:0") # device = torch.device(c.CPU) experiment_setting = { # Auxiliary Tasks c.AUXILIARY_TASKS: {}, # Buffer c.BUFFER_PREPROCESSING: gt.Compose([ gt.Transpose((0, 3, 1, 2)), gt.Reshape(), ]), c.BUFFER_SETTING: { c.KWARGS: { c.MEMORY_SIZE: memory_size, c.OBS_DIM: (obs_dim, ), c.H_STATE_DIM: (1, ), c.ACTION_DIM: (action_dim, ), c.REWARD_DIM: (1, ), c.INFOS: { c.MEAN: ((action_dim, ), np.float32), c.VARIANCE: ((action_dim, ), np.float32), c.ENTROPY: ((action_dim, ), np.float32), c.LOG_PROB: ((1, ), np.float32), c.VALUE: ((1, ), np.float32), c.DISCOUNTING: ((1, ), np.float32)
latent_dim = 50 memory_size = max_total_steps = 100000 // action_repeat device = torch.device("cuda:0") # device = torch.device(c.CPU) experiment_setting = { # Auxiliary Tasks c.AUXILIARY_TASKS: {}, # Buffer c.BUFFER_PREPROCESSING: gt.Compose([ gt.Transpose((0, 3, 1, 2)), it.NumPyCenterCrop(raw_img_dim, height=processed_h, width=processed_w), gt.Reshape(), ]), c.BUFFER_SETTING: { c.KWARGS: { c.MEMORY_SIZE: memory_size, c.OBS_DIM: (obs_dim, ), c.H_STATE_DIM: (1, ), c.ACTION_DIM: (action_dim, ), c.REWARD_DIM: (1, ), c.INFOS: { c.MEAN: ((action_dim, ), np.float32), c.VARIANCE: ((action_dim, ), np.float32), c.ENTROPY: ((action_dim, ), np.float32), c.LOG_PROB: ((1, ), np.float32), c.VALUE: ((1, ), np.float32),