Beispiel #1
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'policy': ClosedLoopBlockStackDemoPolicy,  # policy class
         'env_params':
         None,  # parameters containing info about env -> set automatically
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #2
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'action_dim': 1,    # dimensionality of the action space
         'normalization': 'none',        # normalization used in policy network ['none', 'batch']
         'action_input': True,       # forward takes actions as second argument if set to True
     })
     return default_dict
Beispiel #3
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'device': None,  # pytorch device
         'discount_factor': 0.99,  # discount factor for RL update
         'optimizer':
         'adam',  # supported: 'adam', 'radam', 'rmsprop', 'sgd'
         'gradient_clip': None,  # max grad norm, if None no clipping
         'momentum': 0,  # momentum in RMSProp / SGD optimizer
         'adam_beta': 0.9,  # beta1 param in Adam
         'update_iterations':
         1,  # number of iteration steps per one call to 'update(...)'
         'target_network_update_factor':
         5e-3,  # percentage of new weights that are carried over
         'batch_size': 64,  # size of the experience batch used for updates
         'obs_normalizer':
         DummyNormalizer,  # observation normalization class
         'obs_normalizer_params':
         {},  # parameters for optimization norm class
         'obs_norm_log_groups':
         {},  # (optional) dict defining separation of state space for obsNormLog
         'log_videos': True,  # whether to log videos during logging
         'log_video_caption': False,  # whether to add captions to video
         'num_workers':
         None,  # number of independent workers --> whether grads need sync
     })
     return default_dict
Beispiel #4
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'policy': None,  # policy class
         'policy_params': None,  # parameters for the policy class
         'policy_lr': 3e-4,  # learning rate for policy update
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #5
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'unused_obs_size':
         None,  # dimensionality of split off observation part
         'discard_part':
         'back',  # which part of observation to discard ['front', 'back']
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #6
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'prior_input_res': 32,      # input resolution of prior images
         'encoder_ngf': 8,           # number of feature maps in shallowest level of encoder
         'n_input_frames': 1,        # number of prior input frames
     })
     # add new params to parent params
     return super()._default_hparams().overwrite(default_dict)
Beispiel #7
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'input_dim': 32,    # dimensionality of the observation input
         'n_layers': 3,      # number of policy network layers
         'nz_mid': 64,       # size of the intermediate network layers
         'output_dim': 1,    # number of outputs, can be >1 for discrete action spaces
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #8
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'input_res': 32,                  # resolution of the image input
         'input_nc': 3,                    # number of input channels
         'ngf': 8,                         # number of channels in shallowest layer of image encoder
         'nz_enc': 64,                     # number of dimensions in encoder-latent space
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #9
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'omega_schedule': ConstantSchedule,  # schedule used for omega param
         'omega_schedule_params': AttrDict(   # parameters for omega schedule
             p = 0.1,
         ),
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #10
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'device':
         None,  # device that all tensors should get transferred to
         'screen_width': 400,  # width of rendered images
         'screen_height': 400,  # height of rendered images
     })
     return default_dict
Beispiel #11
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'action_dim': 1,  # dimensionality of the action space
         'max_action_range':
         1.,  # for cont. actions this defines a symmetric action range [-x, x]
         'squash_output_dist':
         True,  # do not tanh adjust log prob if set to False
     })
     return default_dict
Beispiel #12
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'alpha_min': None,                # minimum value alpha is clipped to, no clipping if None
         'td_schedule': ConstantSchedule,  # schedule used for target divergence param
         'td_schedule_params': AttrDict(   # parameters for target divergence schedule
             p = 1.,
         ),
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #13
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'prior_model': None,              # prior model class
         'prior_model_params': None,       # parameters for the prior model
         'prior_model_checkpoint': None,   # checkpoint path of the prior model
         'prior_model_epoch': 'latest',    # epoch that checkpoint should be loaded for (defaults to latest)
         'load_weights': True,             # optionally allows to *not* load the weights (ie train from scratch)
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #14
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'model': None,  # policy class
         'model_params': None,  # parameters for the policy class
         'model_checkpoint': None,  # checkpoint path of the model
         'model_epoch':
         'latest',  # epoch that checkpoint should be loaded for (defaults to latest)
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #15
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'input_dim': 32,  # dimensionality of the observation input
         'n_layers': 3,  # number of policy network layers
         'nz_mid': 64,  # size of the intermediate network layers
         'normalization':
         'none',  # normalization used in policy network ['none', 'batch']
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #16
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'clip_raw_obs': np.array(
             float("Inf")),  # symmetric value maximum for raw observation
         'clip_norm_obs': np.array(float(
             "Inf")),  # symmetric value maximum for normalized observation
         'update_horizon':
         1e7,  # number of values for which statistics get updated
     })
     return default_dict
Beispiel #17
0
    def _default_hparams(self):
        default_dict = ParamDict({
            'name': None,  # name of openai/gym environment
            'reward_norm': 1.,  # reward normalization factor
            'punish_reward':
            -100,  # reward used when action leads to simulation crash
            'unwrap_time':
            True,  # removes time limit wrapper from envs so that done is not set on timeout
        })

        return super()._default_hparams().overwrite(default_dict)
Beispiel #18
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'prior_model': None,              # prior model class
         'prior_model_params': None,       # parameters for the prior model
         'prior_model_checkpoint': None,   # checkpoint path of the prior model
         'prior_model_epoch': 'latest',    # epoch that checkpoint should be loaded for (defaults to latest)
         'prior_batch_size': -1,           # optional: use separate batch size for prior network
         'reverse_KL': False,              # if True, computes KL[q||p] instead of KL[p||q] (can be more stable to opt)
         'analytic_KL': False,             # if True, computes KL divergence analytically, otherwise sampling based
         'num_mc_samples': 10,             # number of samples for monte-carlo KL estimate
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #19
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'hl_agent': None,                         # high-level agent class
         'hl_agent_params': None,                  # parameters of the high-level agent
         'll_agent': None,                         # low-level agent class
         'll_agent_params': None,                  # parameters of the low-level agent(s)
         'update_hl': True,                        # whether to update high-level agent
         'update_ll': True,                        # whether to update low-level agent(s)
         'll_subgoal_reaching_reward': False,      # whether to count ll subgoal reaching reward in training
         'll_subgoal_reaching_reward_weight': 1e3, # weight for the subgoal reaching reward
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #20
0
 def _default_hparams(self):
     # put new parameters in here:
     return super()._default_hparams().overwrite(ParamDict({
         'use_convs': False,
         'device': None,
         'state_dim': 1,             # dimensionality of the state space
         'action_dim': 1,            # dimensionality of the action space
         'nz_mid': 128,              # number of dimensions for internal feature spaces
         'n_processing_layers': 5,   # number of layers in MLPs
         'output_type': 'gauss',     # distribution type for learned prior, ['gauss', 'gmm', 'flow']
         'n_gmm_prior_components': 5,    # number of Gaussian components for GMM learned prior
     }))
Beispiel #21
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'critic': None,  # critic class
         'critic_params': None,  # parameters for the critic class
         'replay': None,  # replay buffer class
         'replay_params': None,  # parameters for replay buffer
         'critic_lr': 3e-4,  # learning rate for critic update
         'reward_scale': 1.0,  # SAC reward scale
         'clip_q_target': False,  # if True, clips Q target
         'target_entropy':
         None,  # target value for automatic entropy tuning, if None uses -action_dim
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #22
0
    def _default_hparams(self):
        # put new parameters in here:
        default_dict = ParamDict({
            'use_convs': False,
            'device': None,
            'n_rollout_steps': 10,  # number of decoding steps
            'cond_decode':
            False,  # if True, conditions decoder on prior inputs
        })

        # Network size
        default_dict.update({
            'state_dim': 1,  # dimensionality of the state space
            'action_dim': 1,  # dimensionality of the action space
            'nz_enc': 32,  # number of dimensions in encoder-latent space
            'nz_vae': 10,  # number of dimensions in vae-latent space
            'nz_mid': 32,  # number of dimensions for internal feature spaces
            'nz_mid_lstm': 128,  # size of middle LSTM layers
            'n_lstm_layers': 1,  # number of LSTM layers
            'n_processing_layers': 3,  # number of layers in MLPs
        })

        # Learned prior
        default_dict.update({
            'n_prior_nets': 1,  # number of prior networks in ensemble
            'num_prior_net_layers':
            6,  # number of layers of the learned prior MLP
            'nz_mid_prior':
            128,  # dimensionality of internal feature spaces for prior net
            'nll_prior_train':
            True,  # if True, trains learned prior by maximizing NLL
            'learned_prior_type':
            'gauss',  # distribution type for learned prior, ['gauss', 'gmm', 'flow']
            'n_gmm_prior_components':
            5,  # number of Gaussian components for GMM learned prior
        })

        # Loss weights
        default_dict.update({
            'reconstruction_mse_weight':
            1.,  # weight of MSE reconstruction loss
            'kl_div_weight': 1.,  # weight of KL divergence loss
        })

        # add new params to parent params
        parent_params = super()._default_hparams()
        parent_params.overwrite(default_dict)
        return parent_params
Beispiel #23
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'policy_model': None,  # policy model class
         'policy_model_params': None,  # parameters for the policy model
         'policy_model_checkpoint':
         None,  # checkpoint path of the policy model
         'policy_model_epoch':
         'latest',  # epoch that checkpoint should be loaded for (defaults to latest)
         'load_weights':
         True,  # optionally allows to *not* load the weights (ie train from scratch)
         'initial_log_sigma':
         -50,  # initial log sigma of policy dist (since model is deterministic)
     })
     return super()._default_hparams().overwrite(default_dict)
Beispiel #24
0
 def _default_hparams(self):
     return ParamDict({
         'input_dim': None,          # dimensionality of the vector input
         'input_res': None,          # resolution of image input
         'output_dim': None,         # dimensionality of output tensor
         'input_nc': 3,              # number of input channels
         'ngf': 8,                   # number of channels in shallowest layer of image encoder
         'nz_enc': 32,               # number of dimensions in encoder-latent space
         'nz_mid': 32,               # number of dimensions for internal feature spaces
         'n_layers': 3,              # number of layers in MLPs
         'normalization': 'none',    # normalization used in encoder network ['none', 'batch']
         'use_convs': False,
         'device': None,
     })
Beispiel #25
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'seed': None,
         'agent': None,
         'data_dir': None,  # directory where dataset is in
         'environment': None,
         'sampler': Sampler,     # sampler type used
         'exp_path': None,  # Path to the folder with experiments
         'num_epochs': 200,
         'max_rollout_len': 1000,  # maximum length of the performed rollout
         'n_steps_per_update': 1,     # number of env steps collected per policy update
         'n_steps_per_epoch': 20000,       # number of env steps per epoch
         'log_output_per_epoch': 100,  # log the non-image/video outputs N times per epoch
         'log_images_per_epoch': 4,    # log images/videos N times per epoch
         'logging_target': 'wandb',    # where to log results to
         'n_warmup_steps': 0,    # steps of warmup experience collection before training
     })
     return default_dict
Beispiel #26
0
    def _default_hparams(self):
        # Data Dimensions
        default_dict = ParamDict({
            'batch_size': -1,
        })
        
        # Network params
        default_dict.update({
            'normalization': 'batch',
        })

        # Misc params
        default_dict.update({
        })

        return default_dict
Beispiel #27
0
 def _default_hparams(self):
     default_dict = ParamDict({
         'model': None,
         'model_test': None,
         'logger': None,
         'logger_test': None,
         'evaluator': None,
         'data_dir': None,  # directory where dataset is in
         'batch_size': 16,
         'exp_path': None,  # Path to the folder with experiments
         'num_epochs': 200,
         'epoch_cycles_train': 1,
         'optimizer':
         'radam',  # supported: 'adam', 'radam', 'rmsprop', 'sgd'
         'lr': 1e-3,
         'gradient_clip': None,
         'momentum': 0,  # momentum in RMSProp / SGD optimizer
         'adam_beta': 0.9,  # beta1 param in Adam
         'top_of_n_eval': 1,  # number of samples used at eval time
         'top_comp_metric':
         None,  # metric that is used for comparison at eval time (e.g. 'mse')
     })
     return default_dict
Beispiel #28
0
    def _default_hparams(self):
        return BaseEnvironment._default_hparams(self).overwrite(ParamDict({
            'n_blocks': 5,                 # number of blocks in env
            'block_size': 0.04,            # size of a block
            'block_color': 'white',        # color of the block
            'rotate_blocks': False,        # no block rotation if set to False
            'allow_rotate': False,         # if False, disallow gripper rotation
            'table_size': (1.2, 1.2, 0.8), # size of table
            'dimension': 2,                # dimensionality for the task
            'camera_name': 'frontview',    # name of camera to render
            'gripper_width': 0.02,         # thickness of gripper to consider during placement
            'task_generator': None,        # task generator for generating HL plans
            'task_params': AttrDict({}),   # parameters for task generator
            'perturb_actions': False,      # if True, perturb action and init block placement
            'perturb_prob': 0.3,           # action perturb probability
            'perturb_scale': 0.03,         # action perturb scale
            'n_steps': None,               # number of steps in the task, default n_blocks - 1
            'friction': 1,                 # friction for the boxes
            'rand_task': False,            # if True, randomizes the task in every reset (i.e. multi-task env)
            'rand_init_pos': False,        # if False, keeps initial position of blocks constant
            'rand_init_gripper': False,    # if True, randomizes gripper xy position at each episode reset
            'include_quat': False,         # if True, include quaternions in observation
            'include_vel': True,           # if True, include velocity of the gripper
            'include_2d_rotation': False,  # if True, adds 2D rotation representation for blocks to obs (sin+cos)
            'clip_obs': 2.0,               # if not None, clip observation values
            'seed': None,                  # seed for generating block placements
            'relative_shaped_reward': False,  # if True, computes shaping reward as relative change towards the goal
            'action_penalty_weight': 0.,   # penalty for action magnitude
            'reward_density': 'dense',     # integer defining how dense the reward is ['dense', 'sparse']
            'number_blocks': False,        # if True, print number on blocks
            'fixed_task': None,            # (optional) if provided is used as fixed task
            'fixed_block_pos': None,       # (optional) if provided is used as fixed block position
            'add_boundary_walls': True,    # if True, adds invisible walls that constrain movement
            'reset_with_boundary': False,  # if True, resets episode once agent leaves allowed region

            'reward_scale': 1.0,           # scale of the reward
        }))
Beispiel #29
0
 def _default_hparams(self):
     return super()._default_hparams().overwrite(
         ParamDict({
             'obs_res': 64,  # resolution of image observation
         }))
Beispiel #30
0
 def _default_hparams(self):
     return super()._default_hparams().overwrite(
         ParamDict({
             'name': "Widow250OfficeFixed-v0",
         }))