def _default_hparams(self): default_dict = ParamDict({ 'policy': ClosedLoopBlockStackDemoPolicy, # policy class 'env_params': None, # parameters containing info about env -> set automatically }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'action_dim': 1, # dimensionality of the action space 'normalization': 'none', # normalization used in policy network ['none', 'batch'] 'action_input': True, # forward takes actions as second argument if set to True }) return default_dict
def _default_hparams(self): default_dict = ParamDict({ 'device': None, # pytorch device 'discount_factor': 0.99, # discount factor for RL update 'optimizer': 'adam', # supported: 'adam', 'radam', 'rmsprop', 'sgd' 'gradient_clip': None, # max grad norm, if None no clipping 'momentum': 0, # momentum in RMSProp / SGD optimizer 'adam_beta': 0.9, # beta1 param in Adam 'update_iterations': 1, # number of iteration steps per one call to 'update(...)' 'target_network_update_factor': 5e-3, # percentage of new weights that are carried over 'batch_size': 64, # size of the experience batch used for updates 'obs_normalizer': DummyNormalizer, # observation normalization class 'obs_normalizer_params': {}, # parameters for optimization norm class 'obs_norm_log_groups': {}, # (optional) dict defining separation of state space for obsNormLog 'log_videos': True, # whether to log videos during logging 'log_video_caption': False, # whether to add captions to video 'num_workers': None, # number of independent workers --> whether grads need sync }) return default_dict
def _default_hparams(self): default_dict = ParamDict({ 'policy': None, # policy class 'policy_params': None, # parameters for the policy class 'policy_lr': 3e-4, # learning rate for policy update }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'unused_obs_size': None, # dimensionality of split off observation part 'discard_part': 'back', # which part of observation to discard ['front', 'back'] }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'prior_input_res': 32, # input resolution of prior images 'encoder_ngf': 8, # number of feature maps in shallowest level of encoder 'n_input_frames': 1, # number of prior input frames }) # add new params to parent params return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'input_dim': 32, # dimensionality of the observation input 'n_layers': 3, # number of policy network layers 'nz_mid': 64, # size of the intermediate network layers 'output_dim': 1, # number of outputs, can be >1 for discrete action spaces }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'input_res': 32, # resolution of the image input 'input_nc': 3, # number of input channels 'ngf': 8, # number of channels in shallowest layer of image encoder 'nz_enc': 64, # number of dimensions in encoder-latent space }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'omega_schedule': ConstantSchedule, # schedule used for omega param 'omega_schedule_params': AttrDict( # parameters for omega schedule p = 0.1, ), }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'device': None, # device that all tensors should get transferred to 'screen_width': 400, # width of rendered images 'screen_height': 400, # height of rendered images }) return default_dict
def _default_hparams(self): default_dict = ParamDict({ 'action_dim': 1, # dimensionality of the action space 'max_action_range': 1., # for cont. actions this defines a symmetric action range [-x, x] 'squash_output_dist': True, # do not tanh adjust log prob if set to False }) return default_dict
def _default_hparams(self): default_dict = ParamDict({ 'alpha_min': None, # minimum value alpha is clipped to, no clipping if None 'td_schedule': ConstantSchedule, # schedule used for target divergence param 'td_schedule_params': AttrDict( # parameters for target divergence schedule p = 1., ), }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'prior_model': None, # prior model class 'prior_model_params': None, # parameters for the prior model 'prior_model_checkpoint': None, # checkpoint path of the prior model 'prior_model_epoch': 'latest', # epoch that checkpoint should be loaded for (defaults to latest) 'load_weights': True, # optionally allows to *not* load the weights (ie train from scratch) }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'model': None, # policy class 'model_params': None, # parameters for the policy class 'model_checkpoint': None, # checkpoint path of the model 'model_epoch': 'latest', # epoch that checkpoint should be loaded for (defaults to latest) }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'input_dim': 32, # dimensionality of the observation input 'n_layers': 3, # number of policy network layers 'nz_mid': 64, # size of the intermediate network layers 'normalization': 'none', # normalization used in policy network ['none', 'batch'] }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'clip_raw_obs': np.array( float("Inf")), # symmetric value maximum for raw observation 'clip_norm_obs': np.array(float( "Inf")), # symmetric value maximum for normalized observation 'update_horizon': 1e7, # number of values for which statistics get updated }) return default_dict
def _default_hparams(self): default_dict = ParamDict({ 'name': None, # name of openai/gym environment 'reward_norm': 1., # reward normalization factor 'punish_reward': -100, # reward used when action leads to simulation crash 'unwrap_time': True, # removes time limit wrapper from envs so that done is not set on timeout }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'prior_model': None, # prior model class 'prior_model_params': None, # parameters for the prior model 'prior_model_checkpoint': None, # checkpoint path of the prior model 'prior_model_epoch': 'latest', # epoch that checkpoint should be loaded for (defaults to latest) 'prior_batch_size': -1, # optional: use separate batch size for prior network 'reverse_KL': False, # if True, computes KL[q||p] instead of KL[p||q] (can be more stable to opt) 'analytic_KL': False, # if True, computes KL divergence analytically, otherwise sampling based 'num_mc_samples': 10, # number of samples for monte-carlo KL estimate }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): default_dict = ParamDict({ 'hl_agent': None, # high-level agent class 'hl_agent_params': None, # parameters of the high-level agent 'll_agent': None, # low-level agent class 'll_agent_params': None, # parameters of the low-level agent(s) 'update_hl': True, # whether to update high-level agent 'update_ll': True, # whether to update low-level agent(s) 'll_subgoal_reaching_reward': False, # whether to count ll subgoal reaching reward in training 'll_subgoal_reaching_reward_weight': 1e3, # weight for the subgoal reaching reward }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): # put new parameters in here: return super()._default_hparams().overwrite(ParamDict({ 'use_convs': False, 'device': None, 'state_dim': 1, # dimensionality of the state space 'action_dim': 1, # dimensionality of the action space 'nz_mid': 128, # number of dimensions for internal feature spaces 'n_processing_layers': 5, # number of layers in MLPs 'output_type': 'gauss', # distribution type for learned prior, ['gauss', 'gmm', 'flow'] 'n_gmm_prior_components': 5, # number of Gaussian components for GMM learned prior }))
def _default_hparams(self): default_dict = ParamDict({ 'critic': None, # critic class 'critic_params': None, # parameters for the critic class 'replay': None, # replay buffer class 'replay_params': None, # parameters for replay buffer 'critic_lr': 3e-4, # learning rate for critic update 'reward_scale': 1.0, # SAC reward scale 'clip_q_target': False, # if True, clips Q target 'target_entropy': None, # target value for automatic entropy tuning, if None uses -action_dim }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): # put new parameters in here: default_dict = ParamDict({ 'use_convs': False, 'device': None, 'n_rollout_steps': 10, # number of decoding steps 'cond_decode': False, # if True, conditions decoder on prior inputs }) # Network size default_dict.update({ 'state_dim': 1, # dimensionality of the state space 'action_dim': 1, # dimensionality of the action space 'nz_enc': 32, # number of dimensions in encoder-latent space 'nz_vae': 10, # number of dimensions in vae-latent space 'nz_mid': 32, # number of dimensions for internal feature spaces 'nz_mid_lstm': 128, # size of middle LSTM layers 'n_lstm_layers': 1, # number of LSTM layers 'n_processing_layers': 3, # number of layers in MLPs }) # Learned prior default_dict.update({ 'n_prior_nets': 1, # number of prior networks in ensemble 'num_prior_net_layers': 6, # number of layers of the learned prior MLP 'nz_mid_prior': 128, # dimensionality of internal feature spaces for prior net 'nll_prior_train': True, # if True, trains learned prior by maximizing NLL 'learned_prior_type': 'gauss', # distribution type for learned prior, ['gauss', 'gmm', 'flow'] 'n_gmm_prior_components': 5, # number of Gaussian components for GMM learned prior }) # Loss weights default_dict.update({ 'reconstruction_mse_weight': 1., # weight of MSE reconstruction loss 'kl_div_weight': 1., # weight of KL divergence loss }) # add new params to parent params parent_params = super()._default_hparams() parent_params.overwrite(default_dict) return parent_params
def _default_hparams(self): default_dict = ParamDict({ 'policy_model': None, # policy model class 'policy_model_params': None, # parameters for the policy model 'policy_model_checkpoint': None, # checkpoint path of the policy model 'policy_model_epoch': 'latest', # epoch that checkpoint should be loaded for (defaults to latest) 'load_weights': True, # optionally allows to *not* load the weights (ie train from scratch) 'initial_log_sigma': -50, # initial log sigma of policy dist (since model is deterministic) }) return super()._default_hparams().overwrite(default_dict)
def _default_hparams(self): return ParamDict({ 'input_dim': None, # dimensionality of the vector input 'input_res': None, # resolution of image input 'output_dim': None, # dimensionality of output tensor 'input_nc': 3, # number of input channels 'ngf': 8, # number of channels in shallowest layer of image encoder 'nz_enc': 32, # number of dimensions in encoder-latent space 'nz_mid': 32, # number of dimensions for internal feature spaces 'n_layers': 3, # number of layers in MLPs 'normalization': 'none', # normalization used in encoder network ['none', 'batch'] 'use_convs': False, 'device': None, })
def _default_hparams(self): default_dict = ParamDict({ 'seed': None, 'agent': None, 'data_dir': None, # directory where dataset is in 'environment': None, 'sampler': Sampler, # sampler type used 'exp_path': None, # Path to the folder with experiments 'num_epochs': 200, 'max_rollout_len': 1000, # maximum length of the performed rollout 'n_steps_per_update': 1, # number of env steps collected per policy update 'n_steps_per_epoch': 20000, # number of env steps per epoch 'log_output_per_epoch': 100, # log the non-image/video outputs N times per epoch 'log_images_per_epoch': 4, # log images/videos N times per epoch 'logging_target': 'wandb', # where to log results to 'n_warmup_steps': 0, # steps of warmup experience collection before training }) return default_dict
def _default_hparams(self): # Data Dimensions default_dict = ParamDict({ 'batch_size': -1, }) # Network params default_dict.update({ 'normalization': 'batch', }) # Misc params default_dict.update({ }) return default_dict
def _default_hparams(self): default_dict = ParamDict({ 'model': None, 'model_test': None, 'logger': None, 'logger_test': None, 'evaluator': None, 'data_dir': None, # directory where dataset is in 'batch_size': 16, 'exp_path': None, # Path to the folder with experiments 'num_epochs': 200, 'epoch_cycles_train': 1, 'optimizer': 'radam', # supported: 'adam', 'radam', 'rmsprop', 'sgd' 'lr': 1e-3, 'gradient_clip': None, 'momentum': 0, # momentum in RMSProp / SGD optimizer 'adam_beta': 0.9, # beta1 param in Adam 'top_of_n_eval': 1, # number of samples used at eval time 'top_comp_metric': None, # metric that is used for comparison at eval time (e.g. 'mse') }) return default_dict
def _default_hparams(self): return BaseEnvironment._default_hparams(self).overwrite(ParamDict({ 'n_blocks': 5, # number of blocks in env 'block_size': 0.04, # size of a block 'block_color': 'white', # color of the block 'rotate_blocks': False, # no block rotation if set to False 'allow_rotate': False, # if False, disallow gripper rotation 'table_size': (1.2, 1.2, 0.8), # size of table 'dimension': 2, # dimensionality for the task 'camera_name': 'frontview', # name of camera to render 'gripper_width': 0.02, # thickness of gripper to consider during placement 'task_generator': None, # task generator for generating HL plans 'task_params': AttrDict({}), # parameters for task generator 'perturb_actions': False, # if True, perturb action and init block placement 'perturb_prob': 0.3, # action perturb probability 'perturb_scale': 0.03, # action perturb scale 'n_steps': None, # number of steps in the task, default n_blocks - 1 'friction': 1, # friction for the boxes 'rand_task': False, # if True, randomizes the task in every reset (i.e. multi-task env) 'rand_init_pos': False, # if False, keeps initial position of blocks constant 'rand_init_gripper': False, # if True, randomizes gripper xy position at each episode reset 'include_quat': False, # if True, include quaternions in observation 'include_vel': True, # if True, include velocity of the gripper 'include_2d_rotation': False, # if True, adds 2D rotation representation for blocks to obs (sin+cos) 'clip_obs': 2.0, # if not None, clip observation values 'seed': None, # seed for generating block placements 'relative_shaped_reward': False, # if True, computes shaping reward as relative change towards the goal 'action_penalty_weight': 0., # penalty for action magnitude 'reward_density': 'dense', # integer defining how dense the reward is ['dense', 'sparse'] 'number_blocks': False, # if True, print number on blocks 'fixed_task': None, # (optional) if provided is used as fixed task 'fixed_block_pos': None, # (optional) if provided is used as fixed block position 'add_boundary_walls': True, # if True, adds invisible walls that constrain movement 'reset_with_boundary': False, # if True, resets episode once agent leaves allowed region 'reward_scale': 1.0, # scale of the reward }))
def _default_hparams(self): return super()._default_hparams().overwrite( ParamDict({ 'obs_res': 64, # resolution of image observation }))
def _default_hparams(self): return super()._default_hparams().overwrite( ParamDict({ 'name': "Widow250OfficeFixed-v0", }))