def __init__(self, hyperparams): # ALG_BADMM = { # 'inner_iterations': 4, # 'policy_dual_rate': 0.1, # 'policy_dual_rate_covar': 0.0, # 'fixed_lg_step': 0, # 'lg_step_schedule': 10.0, # 'ent_reg_schedule': 0.0, # 'init_pol_wt': 0.01, # 'policy_sample_mode': 'add', # 'exp_step_increase': 2.0, # 'exp_step_decrease': 0.5, # 'exp_step_upper': 0.5, # 'exp_step_lower': 1.0, # } # Copy and update parameters config = copy.deepcopy(ALG_BADMM) # update() adds dictionary dict2's key-values pairs in to dict config.update(hyperparams) # Initialize (algorithm.py) Algorithm.__init__(self, config) # algorithm['policy_prior'] = { # 'type': PolicyPriorGMM, # 'max_clusters': 20, # 'min_samples_per_cluster': 40, # 'max_samples': 40, # } policy_prior = self._hyperparams['policy_prior'] # self._cond_idx = hyperparams['train_conditions'] # self.M = hyperparams['conditions'] = 2 for m in range(self.M): # self.cur = [IterationData() for _ in range(self.M)] # Initialize policy information self.cur[m].pol_info = PolicyInfo(self._hyperparams) self.cur[m].pol_info.policy_prior = \ policy_prior['type'](policy_prior) # algorithm['policy_opt'] = { # 'type': PolicyOptTf, # 'network_params': { # 'obs_include': [JOINT_ANGLES, JOINT_VELOCITIES], # 'obs_vector_data': [JOINT_ANGLES, JOINT_VELOCITIES], # 'sensor_dims': SENSOR_DIMS, # }, # 'network_model': tf_network, # 'iterations': 1000, # 'weights_file_prefix': EXP_DIR + 'policy', # } self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU)
def __init__(self, hyperparams): config = copy.deepcopy(ALG_BADMM) config.update(hyperparams) Algorithm.__init__(self, config) policy_prior = self._hyperparams['policy_prior'] for m in range(self.M): self.cur[m].pol_info = PolicyInfo(self._hyperparams) self.cur[m].pol_info.policy_prior = \ policy_prior['type'](policy_prior) self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU)
def __init__(self, hyperparams): config = copy.deepcopy(ALG_MDGPS) config.update(hyperparams) Algorithm.__init__(self, config) for m in range(self.M): self.cur[m].pol_info = PolicyInfo(self._hyperparams) policy_prior = self._hyperparams['policy_prior'] self.cur[m].pol_info.policy_prior = \ policy_prior['type'](policy_prior) self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU )
def __init__(self, hyperparams): config = copy.deepcopy(ALG_OLGPS) config.update(hyperparams) Algorithm.__init__(self, config) self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU ) self.flag_reset = False policy_prior = self._hyperparams['policy_prior'] for m in range(self.M): self.cur[m].last_pol = PolicyInfo(self._hyperparams) self.cur[m].last_pol.policy_prior = \ policy_prior['type'](policy_prior)
def __init__(self, hyperparams): config = copy.deepcopy(ALG_PI2) config.update(hyperparams) Algorithm.__init__(self, config)
def __init__(self, hyperparams): Algorithm.__init__(self, hyperparams)
def __init__(self, hyperparams): Algorithm.__init__(self, hyperparams) self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU)