def _advance_iteration_variables(self): """ Move all 'cur' variables to 'prev', and advance iteration counter. """ self.iteration_count += 1 self.prev = copy.deepcopy(self.cur) self.cur = [IterationData() for _ in range(self.M)] self.traj_distr[self.iteration_count] = [] self.traj_info[self.iteration_count] = [] self.kl_div[self.iteration_count] = [] self.dists_to_target[self.iteration_count] = [] self.previous_cost = [] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() self.cur[m].traj_info.dynamics = copy.deepcopy( self.prev[m].traj_info.dynamics) self.cur[m].step_mult = self.prev[m].step_mult self.cur[m].eta = self.prev[m].eta self.cur[m].traj_distr = self.new_traj_distr[m] self.traj_distr[self.iteration_count].append( self.new_traj_distr[m]) self.traj_info[self.iteration_count].append(self.cur[m].traj_info) if self._hyperparams['ioc']: self.cur[m].prevcost_traj_info = TrajectoryInfo() self.previous_cost.append(self.cost[m].copy()) delattr(self, 'new_traj_distr')
def __init__(self, hyperparams): config = copy.deepcopy(ALG) config.update(hyperparams) self._hyperparams = config if 'train_conditions' in hyperparams: self._cond_idx = hyperparams['train_conditions'] self.M = len(self._cond_idx) else: self.M = hyperparams['conditions'] self._cond_idx = range(self.M) self._hyperparams['train_conditions'] = self._cond_idx self._hyperparams['test_conditions'] = self._cond_idx self.iteration_count = 0 # Grab a few values from the agent. agent = self._hyperparams['agent'] self.T = self._hyperparams['T'] = agent.T self.dU = self._hyperparams['dU'] = agent.dU self.dX = self._hyperparams['dX'] = agent.dX self.dO = self._hyperparams['dO'] = agent.dO init_traj_distr = config['init_traj_distr'] init_traj_distr['x0'] = agent.x0 init_traj_distr['dX'] = agent.dX init_traj_distr['dU'] = agent.dU del self._hyperparams['agent'] # Don't want to pickle this. # IterationData objects for each condition. self.cur = [IterationData() for _ in range(self.M)] self.prev = [IterationData() for _ in range(self.M)] if self._hyperparams['fit_dynamics']: dynamics = self._hyperparams['dynamics'] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() if self._hyperparams['fit_dynamics']: self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) init_traj_distr = extract_condition( self._hyperparams['init_traj_distr'], self._cond_idx[m] ) self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) self.traj_opt = hyperparams['traj_opt']['type']( hyperparams['traj_opt'] ) if type(hyperparams['cost']) == list: self.cost = [ hyperparams['cost'][i]['type'](hyperparams['cost'][i]) for i in range(self.M) ] else: self.cost = [ hyperparams['cost']['type'](hyperparams['cost']) for _ in range(self.M) ] self.base_kl_step = self._hyperparams['kl_step'] self.mpc = [] # For initialize
def _advance_iteration_variables(self): """ Move all 'cur' variables to 'prev', reinitialize 'cur' variables, and advance iteration counter. """ self.iteration_count += 1 self.prev = self.cur # TODO: change IterationData to reflect new stuff better for m in range(self.M): self.prev[m].new_traj_distr = self.new_traj_distr[m] self.cur = [IterationData() for _ in range(self.M)] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() cur_dynamics_prior = self.prev[m].traj_info.dynamics.prior self.prev[m].traj_info.dynamics.prior = None self.cur[m].traj_info.dynamics = copy.deepcopy( self.prev[m].traj_info.dynamics) self.cur[m].traj_info.dynamics.prior = cur_dynamics_prior self.cur[m].step_mult = self.prev[m].step_mult self.cur[m].eta = self.prev[m].eta self.cur[m].traj_distr = self.new_traj_distr[m] delattr(self, 'new_traj_distr') for m in range(self.M): self.cur[m].traj_info.last_kl_step = \ self.prev[m].traj_info.last_kl_step cur_policy_prior = self.prev[m].pol_info.policy_prior self.prev[m].pol_info.policy_prior = None self.cur[m].pol_info = copy.deepcopy(self.prev[m].pol_info) self.cur[m].pol_info.policy_prior = cur_policy_prior
def __init__(self, hyperparams): config = copy.deepcopy(ALG) config.update(hyperparams) self._hyperparams = config if 'train_conditions' in hyperparams: self._cond_idx = hyperparams['train_conditions'] self.M = len(self._cond_idx) else: self.M = hyperparams['conditions'] self._cond_idx = range(self.M) self.iteration_count = 0 # Grab a few values from the agent. agent = self._hyperparams['agent'] self.T = self._hyperparams['T'] = agent.T self.dU = self._hyperparams['dU'] = agent.dU self.dX = self._hyperparams['dX'] = agent.dX self.dO = self._hyperparams['dO'] = agent.dO init_traj_distr = config['init_traj_distr'] init_traj_distr['x0'] = agent.x0 init_traj_distr['dX'] = agent.dX init_traj_distr['dU'] = agent.dU del self._hyperparams['agent'] # Don't want to pickle this. # IterationData objects for each condition. self.cur = [IterationData() for _ in range(self.M)] self.prev = [IterationData() for _ in range(self.M)] self.traj_distr = {self.iteration_count: []} self.traj_info = {self.iteration_count: []} self.kl_div = {self.iteration_count: []} self.dists_to_target = {self.iteration_count: []} self.sample_list = {i: SampleList([]) for i in range(self.M)} for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() dynamics = self._hyperparams['dynamics'] self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) init_traj_distr = extract_condition( self._hyperparams['init_traj_distr'], self._cond_idx[m]) self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) self.traj_distr[self.iteration_count].append( self.cur[m].traj_distr) self.traj_info[self.iteration_count].append(self.cur[m].traj_info) self.traj_opt = hyperparams['traj_opt']['type']( hyperparams['traj_opt']) self.cost = [ hyperparams['cost']['type'](hyperparams['cost']) for _ in range(self.M) ] if self._hyperparams['ioc']: self.gt_cost = [ hyperparams['gt_cost']['type'](hyperparams['gt_cost']) for _ in range(self.M) ] self.base_kl_step = self._hyperparams['kl_step']
def __init__(self, hyperparams): config = copy.deepcopy(ALG) config.update(hyperparams) self._hyperparams = config self.timers = OrderedDict() if 'train_conditions' in hyperparams: self._cond_idx = hyperparams['train_conditions'] self.M = len(self._cond_idx) else: self.M = hyperparams['tac'][ 'clusters'] if 'tac' in hyperparams else hyperparams[ 'conditions'] self._cond_idx = range(self.M) self._hyperparams['train_conditions'] = self._cond_idx self._hyperparams['test_conditions'] = self._cond_idx self.iteration_count = 0 # Grab a few values from the agent. agent = self._hyperparams['agent'] self.T = self._hyperparams['T'] = agent.T self.dU = self._hyperparams['dU'] = agent.dU self.dX = self._hyperparams['dX'] = agent.dX self.dO = self._hyperparams['dO'] = agent.dO #self.dX = self._hyperparams['agent']['dtgtX'] + self.dX init_traj_distr = config['init_traj_distr'] init_traj_distr['x0'] = agent.x0 init_traj_distr['dX'] = agent.dX init_traj_distr['dU'] = agent.dU del self._hyperparams['agent'] # Don't want to pickle this. # IterationData objects for each condition. self.cur = [IterationData() for _ in range(self.M)] self.prev = [IterationData() for _ in range(self.M)] self.new_mu = [None] * self.M self.new_sigma = [None] * self.M dynamics = self._hyperparams['dynamics'] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() if dynamics is not None: self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) init_traj_distr = extract_condition( self._hyperparams['init_traj_distr'], self._cond_idx[0] # TODO Global x0 ) self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) #self.traj_opt = hyperparams['traj_opt']['type']( # hyperparams['traj_opt'] #) self.cost = [ hyperparams['cost']['type'](hyperparams['cost']) for _ in range(self.M) ] self.base_kl_step = self._hyperparams['kl_step']
def _advance_iteration_variables(self): """ Move all 'cur' variables to 'prev', and advance iteration counter. """ self.iteration_count += 1 self.prev = copy.deepcopy(self.cur) self.cur = [IterationData() for _ in range(self.M)] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo()
def _advance_iteration_variables(self): """ Move all 'cur' variables to 'prev', and advance iteration counter. """ self.iteration_count += 1 self.prev = self.cur self.cur = [IterationData() for _ in range(self.M)] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() self.cur[m].traj_info.dynamics = self.prev[m].traj_info.dynamics self.cur[m].step_mult = self.prev[m].step_mult self.cur[m].eta = self.prev[m].eta self.cur[m].traj_distr = self.new_traj_distr[m] delattr(self, 'new_traj_distr')
def _advance_iteration_variables(self): """Move all 'cur' variables to 'prev', and advance iteration counter.""" self.iteration_count += 1 self.prev = copy.deepcopy(self.cur) # TODO: change IterationData to reflect new stuff better for m in range(self.M): self.prev[m].new_traj_distr = self.new_traj_distr[m] self.cur = [IterationData() for _ in range(self.M)] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() self.cur[m].traj_info.dynamics = copy.deepcopy( self.prev[m].traj_info.dynamics) self.cur[m].step_mult = self.prev[m].step_mult self.cur[m].eta = self.prev[m].eta self.cur[m].traj_distr = self.new_traj_distr[m] delattr(self, 'new_traj_distr')
def __init__(self, hyperparams): config = copy.deepcopy(ALG) config.update(hyperparams) self._hyperparams = config self.M = hyperparams['conditions'] self.iteration_count = 0 # Grab a few values from the agent. agent = self._hyperparams['agent'] self.T = self._hyperparams['T'] = agent.T self.dU = self._hyperparams['dU'] = agent.dU self.dX = self._hyperparams['dX'] = agent.dX self.dO = self._hyperparams['dO'] = agent.dO init_traj_distr = config['init_traj_distr'] init_traj_distr['x0'] = agent.x0 init_traj_distr['dX'] = agent.dX init_traj_distr['dU'] = agent.dU del self._hyperparams['agent'] # Don't want to pickle this. # IterationData objects for each condition. self.cur = [IterationData() for _ in range(self.M)] self.prev = [IterationData() for _ in range(self.M)] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() dynamics = self._hyperparams['dynamics'] self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) init_traj_distr = extract_condition( self._hyperparams['init_traj_distr'], m) self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) self.traj_opt = hyperparams['traj_opt']['type']( hyperparams['traj_opt']) self.cost = [ hyperparams['cost']['type'](hyperparams['cost']) for _ in range(self.M) ] self.base_kl_step = self._hyperparams['kl_step']
def reset_alg(self): """ reset the algorithm to initial state at the beginning. reset the traj_distr and traj_info but keep policy_opt """ last_pol = list() for m in range(self.M): last_pol.append(copy.deepcopy(self.cur[m].last_pol)) self.cur = [IterationData() for _ in range(self.M)] if self._hyperparams['fit_dynamics']: dynamics = self._hyperparams['dynamics'] for m in range(self.M): self.cur[m].last_pol = copy.deepcopy(last_pol[m]) self.cur[m].traj_info = TrajectoryInfo() if self._hyperparams['fit_dynamics']: self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) init_traj_distr = extract_condition( self._hyperparams['init_traj_distr'], self._cond_idx[m] ) self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) self.traj_opt = self._hyperparams['traj_opt']['type']( self._hyperparams['traj_opt'] ) if type(self._hyperparams['cost']) == list: self.cost = [ self._hyperparams['cost'][i]['type'](self._hyperparams['cost'][i]) for i in range(self.M) ] else: self.cost = [ self._hyperparams['cost']['type'](self._hyperparams['cost']) for _ in range(self.M) ] self.base_kl_step = self._hyperparams['kl_step']
def __init__(self, hyperparams): config = copy.deepcopy(ALG) config.update(hyperparams) self._hyperparams = config # False if 'train_conditions' in hyperparams: self._cond_idx = hyperparams['train_conditions'] self.M = len(self._cond_idx) else: self.M = hyperparams['conditions'] # 2 self._cond_idx = range(self.M) # [0, 1] self._hyperparams['train_conditions'] = self._cond_idx self._hyperparams['test_conditions'] = self._cond_idx self.iteration_count = 0 # Grab a few values from the agent. agent = self._hyperparams['agent'] self.T = self._hyperparams['T'] = agent.T self.dU = self._hyperparams['dU'] = agent.dU self.dX = self._hyperparams['dX'] = agent.dX self.dO = self._hyperparams['dO'] = agent.dO init_traj_distr = config['init_traj_distr'] init_traj_distr['x0'] = agent.x0 init_traj_distr['dX'] = agent.dX init_traj_distr['dU'] = agent.dU del self._hyperparams['agent'] # Don't want to pickle this. # IterationData objects for each condition. # class IterationData(BundleType): # """ Collection of iteration variables. """ # def __init__(self): # variables = { # 'sample_list': None, # List of samples for the current iteration. # 'traj_info': None, # Current TrajectoryInfo object. # 'pol_info': None, # Current PolicyInfo object. # 'traj_distr': None, # Initial trajectory distribution. # 'new_traj_distr': None, # Updated trajectory distribution. # 'cs': None, # Sample costs of the current iteration. # 'step_mult': 1.0, # KL step multiplier for the current iteration. # 'eta': 1.0, # Dual variable used in LQR backward pass. # } # BundleType.__init__(self, variables) self.cur = [IterationData() for _ in range(self.M)] self.prev = [IterationData() for _ in range(self.M)] # False if self._hyperparams['fit_dynamics']: dynamics = self._hyperparams['dynamics'] for m in range(self.M): # class TrajectoryInfo(BundleType): # """ Collection of trajectory-related variables. """ # def __init__(self): # variables = { # 'dynamics': None, # Dynamics object for the current iteration. # 'x0mu': None, # Mean for the initial state, used by the dynamics. # 'x0sigma': None, # Covariance for the initial state distribution. # 'cc': None, # Cost estimate constant term. # 'cv': None, # Cost estimate vector term. # 'Cm': None, # Cost estimate matrix term. # 'last_kl_step': float('inf'), # KL step of the previous iteration. # } # BundleType.__init__(self, variables) self.cur[m].traj_info = TrajectoryInfo() # False if self._hyperparams['fit_dynamics']: self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) # algorithm['init_traj_distr'] = { # 'type': init_lqr, # 'init_gains': 1.0 / PR2_GAINS, # 'init_acc': np.zeros(SENSOR_DIMS[ACTION]), # 'init_var': 1.0, # 'stiffness': 0.5, # 'stiffness_vel': 0.25, # 'final_weight': 50, # 'dt': agent['dt'], # 'T': agent['T'], # } # self._cond_idx = [0, 1] init_traj_distr = extract_condition( self._hyperparams['init_traj_distr'], self._cond_idx[m]) # def extract_condition(hyperparams, m): # """ # Pull the relevant hyperparameters corresponding to the specified # condition, and return a new hyperparameter dictionary. # """ # return {var: val[m] if isinstance(val, list) else val # for var, val in hyperparams.items()} # If list, extract index m, otherwise extract whole value. # Save to traj_distr (Initial trajectory distribution) self.cur[m].traj_distr = init_traj_distr['type'](init_traj_distr) # algorithm['traj_opt'] = 'type': TrajOptLQRPython # self.traj_opt = TrajOptLQRPython(hyperparams) self.traj_opt = hyperparams['traj_opt']['type']( hyperparams['traj_opt']) # algorithm['cost'] = { # 'type': CostSum, # 'costs': [torque_cost, fk_cost1, fk_cost2], # 'weights': [1.0, 1.0, 1.0], # } if type(hyperparams['cost']) == list: # False self.cost = [ hyperparams['cost'][i]['type'](hyperparams['cost'][i]) for i in range(self.M) ] else: # self.cost = [CostSum(cost_param), CostSum(cost_param)] self.cost = [ hyperparams['cost']['type'](hyperparams['cost']) for _ in range(self.M) ] # 'kl_step': 5.0 self.base_kl_step = self._hyperparams['kl_step']
def __init__(self, hyperparams): config = copy.deepcopy(ALG) config.update(hyperparams) self._hyperparams = config if 'train_conditions' in hyperparams: self._cond_idx = hyperparams['train_conditions'] self.M = len(self._cond_idx) else: self.M = hyperparams['conditions'] self._cond_idx = range(self.M) self._hyperparams['train_conditions'] = self._cond_idx self._hyperparams['test_conditions'] = self._cond_idx self.iteration_count = 0 # Grab a few values from the agent. agent = self._hyperparams['agent'] #print(agent) self.agent = agent self.T = self._hyperparams['T'] = agent.T self.dU = self._hyperparams['dU'] = agent.dU self.dX = self._hyperparams['dX'] = agent.dX self.dO = self._hyperparams['dO'] = agent.dO init_traj_distr = config['init_traj_distr'] init_traj_distr['x0'] = agent.x0 init_traj_distr['dX'] = agent.dX init_traj_distr['dU'] = agent.dU del self._hyperparams['agent'] # Don't want to pickle this. # IterationData objects for each condition. self.cur = [IterationData() for _ in range(self.M)] self.prev = [IterationData() for _ in range(self.M)] dynamics = self._hyperparams['dynamics'] for m in range(self.M): self.cur[m].traj_info = TrajectoryInfo() self.cur[m].traj_info.dynamics = dynamics['type'](dynamics) cur_init_traj_distr = extract_condition(init_traj_distr, self._cond_idx[m]) cur_init_traj_distr['cur_cond_idx'] = self._cond_idx[m] #print(cur_init_traj_distr) self.cur[m].traj_distr = cur_init_traj_distr['type']( cur_init_traj_distr, agent) self.traj_opt = hyperparams['traj_opt']['type']( hyperparams['traj_opt']) self.cost = [] for m in range(self.M): cost_hyperparams = hyperparams['cost'].copy() cost_hyperparams['cur_cond_idx'] = self._cond_idx[m] self.cost.append(hyperparams['cost']['type'](cost_hyperparams)) self.base_kl_step = self._hyperparams['kl_step'] policy_prior = self._hyperparams['policy_prior'] for m in range(self.M): self.cur[m].pol_info = PolicyInfo(self._hyperparams) self.cur[m].pol_info.policy_prior = \ policy_prior['type'](policy_prior) self.policy_opt = self._hyperparams['policy_opt']['type']( self._hyperparams['policy_opt'], self.dO, self.dU)