Ejemplo n.º 1
0
# device = torch.device(c.CPU)

action_repeat = 1
num_frames = 1
hidden_state_dim = 128

memory_size = 1000000
max_total_steps = 1000000

experiment_setting = {
    # Auxiliary Tasks
    c.AUXILIARY_TASKS: {},

    # Buffer
    c.BUFFER_PREPROCESSING:
    gt.Compose([gt.AsType()]),
    c.BUFFER_SETTING: {
        c.KWARGS: {
            c.MEMORY_SIZE: memory_size,
            c.OBS_DIM: (obs_dim, ),
            c.H_STATE_DIM: (hidden_state_dim * 2, ),
            c.ACTION_DIM: (action_dim, ),
            c.REWARD_DIM: (1, ),
            c.INFOS: {
                c.MEAN: ((action_dim, ), np.float32),
                c.VARIANCE: ((action_dim, ), np.float32),
                c.ENTROPY: ((action_dim, ), np.float32),
                c.LOG_PROB: ((1, ), np.float32),
                c.VALUE: ((1, ), np.float32),
                c.DISCOUNTING: ((1, ), np.float32)
            },
Ejemplo n.º 2
0
obs_dim = int(np.product(raw_img_dim) + scalar_feature_dim)
latent_dim = 50

memory_size = max_total_steps = 1000000 // action_repeat

device = torch.device("cuda:0")
# device = torch.device(c.CPU)

experiment_setting = {
    # Auxiliary Tasks
    c.AUXILIARY_TASKS: {},

    # Buffer
    c.BUFFER_PREPROCESSING:
    gt.Compose([
        gt.Transpose((0, 3, 1, 2)),
        gt.Reshape(),
    ]),
    c.BUFFER_SETTING: {
        c.KWARGS: {
            c.MEMORY_SIZE: memory_size,
            c.OBS_DIM: (obs_dim, ),
            c.H_STATE_DIM: (1, ),
            c.ACTION_DIM: (action_dim, ),
            c.REWARD_DIM: (1, ),
            c.INFOS: {
                c.MEAN: ((action_dim, ), np.float32),
                c.VARIANCE: ((action_dim, ), np.float32),
                c.ENTROPY: ((action_dim, ), np.float32),
                c.LOG_PROB: ((1, ), np.float32),
                c.VALUE: ((1, ), np.float32),
                c.DISCOUNTING: ((1, ), np.float32)
Ejemplo n.º 3
0
latent_dim = 50

memory_size = max_total_steps = 100000 // action_repeat

device = torch.device("cuda:0")
# device = torch.device(c.CPU)

experiment_setting = {
    # Auxiliary Tasks
    c.AUXILIARY_TASKS: {},

    # Buffer
    c.BUFFER_PREPROCESSING:
    gt.Compose([
        gt.Transpose((0, 3, 1, 2)),
        it.NumPyCenterCrop(raw_img_dim, height=processed_h, width=processed_w),
        gt.Reshape(),
    ]),
    c.BUFFER_SETTING: {
        c.KWARGS: {
            c.MEMORY_SIZE: memory_size,
            c.OBS_DIM: (obs_dim, ),
            c.H_STATE_DIM: (1, ),
            c.ACTION_DIM: (action_dim, ),
            c.REWARD_DIM: (1, ),
            c.INFOS: {
                c.MEAN: ((action_dim, ), np.float32),
                c.VARIANCE: ((action_dim, ), np.float32),
                c.ENTROPY: ((action_dim, ), np.float32),
                c.LOG_PROB: ((1, ), np.float32),
                c.VALUE: ((1, ), np.float32),