def get_value(self, plan, penalty_coeff=1e0): model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan) model.update() self._bexpr_to_pred = {} obj_bexprs = self._get_trajopt_obj(plan) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=False) return self._prob.get_value(penalty_coeff)
def monitor_update(self, plan, update_values, callback=None, n_resamples=5, active_ts=None, verbose=False): print 'Resolving after environment update...\n' if callback is not None: viewer = callback() if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # _free_attrs is paied attentioned in here self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} tol = 1e-3 obj_bexprs = [] rs_obj = self._update(plan, update_values) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_all_timesteps_of_actions(plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts= active_ts, verbose=verbose) obj_bexprs.extend(rs_obj) self._add_obj_bexprs(obj_bexprs) initial_trust_region_size = 1e-2 solv = Solver() solv.initial_trust_region_size = initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() if DEBUG: assert not plan.has_nan(active_ts) plan.restore_free_attrs() self.reset_variable() print "monitor_update\n" return success
def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False, resample=False, smoothing = False): if callback is not None: viewer = callback() self.plan = plan vehicle = plan.params['vehicle'] if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() initial_trust_region_size = self.initial_trust_region_size if resample: tol = 1e-3 def variable_helper(): error_bin = [] for sco_var in self._prob._vars: for grb_var, val in zip(sco_var.get_grb_vars(), sco_var.get_value()): grb_name = grb_var[0].VarName one, two = grb_name.find('-'), grb_name.find('-(') param_name = grb_name[1:one] attr = grb_name[one+1:two] index = eval(grb_name[two+1:-1]) param = plan.params[param_name] if not np.allclose(val, getattr(param, attr)[index]): error_bin.append((grb_name, val, getattr(param, attr)[index])) if len(error_bin) != 0: print "something wrong" if DEBUG: import ipdb; ipdb.set_trace() """ When Optimization fails, resample new values for certain timesteps of the trajectory and solver as initialization """ obj_bexprs = [] ## this is an objective that places ## a high value on matching the resampled values failed_preds = plan.get_failed_preds(active_ts = active_ts, priority=priority, tol = tol) rs_obj = self._resample(plan, failed_preds, sample_all = True) # import ipdb; ipdb.set_trace() # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory. # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=False, active_ts= active_ts, verbose=verbose) obj_bexprs.extend(rs_obj) self._add_obj_bexprs(obj_bexprs) initial_trust_region_size = 1e3 # import ipdb; ipdb.set_trace() else: self._bexpr_to_pred = {} if priority == -2: """ Initialize an linear trajectory while enforceing the linear constraints in the intermediate step. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) tol = 1e-1 initial_trust_region_size = 1e3 elif priority == -1: """ Solve the optimization problem while enforcing every constraints. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority >= 0: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-3 solv = Solver() solv.initial_trust_region_size = initial_trust_region_size if smoothing: solv.initial_penalty_coeff = self.smooth_penalty_coeff else: solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) if priority == MAX_PRIORITY: success = success or len(plan.get_failed_preds(tol=tol, active_ts=active_ts, priority=priority)) == 0 self._update_ll_params() if DEBUG: assert not plan.has_nan(active_ts) if resample: # During resampling phases, there must be changes added to sampling_trace if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]: reward = 0 if len(plan.get_failed_preds(active_ts = active_ts, priority=priority)) == 0: reward = len(plan.actions) else: failed_t = plan.get_failed_pred(active_ts=(0,active_ts[1]), priority=priority)[2] for i in range(len(plan.actions)): if failed_t > plan.actions[i].active_timesteps[1]: reward += 1 plan.sampling_trace[-1]['reward'] = reward ##Restore free_attrs values plan.restore_free_attrs() self.reset_variable() print "priority: {}\n".format(priority) return success
def _solve_opt_prob(self, plan, priority, callback=None, active_ts=None, verbose=False, resample=True): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose) tol = 1e-2 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) obj_bexprs = [] if resample: failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, add_nonlin=True, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-2 elif priority == 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-4 solv = Solver() solv.initial_penalty_coeff = self.init_penalty_coeff success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() self._failed_groups = self._prob.nonconverged_groups return success
def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -2: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions( plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions( plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = [] obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel')) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) # self._add_first_and_last_timesteps_of_actions( # plan, priority=-1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_first_and_last_timesteps_of_actions( # plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority >= 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-3 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) self._update_ll_params() print "priority: {}".format(priority) # if callback is not None: callback(True) return success
def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False): robot = plan.params['baxter'] body = plan.env.GetRobot("baxter") if callback is not None: viewer = callback() def draw(t): viewer.draw_plan_ts(plan, t) ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # _free_attrs is paied attentioned in here self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -2: """ Initialize an linear trajectory while enforceing the linear constraints in the intermediate step. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) tol = 1e-1 elif priority == -1: """ Solve the optimization problem while enforcing every constraints. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority == 0: """ When Optimization fails, resample new values for certain timesteps of the trajectory and solver as initialization """ ## this should only get called with a full plan for now # assert active_ts == (0, plan.horizon-1) failed_preds = plan.get_failed_preds() # print "{} predicates fails, resampling process begin...\n \ # Checking {}".format(len(failed_preds), failed_preds[0]) ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = [] rs_obj = self._resample(plan, failed_preds) obj_bexprs.extend(rs_obj) # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory. # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts= active_ts, verbose=verbose) tol = 1e-3 elif priority >= 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-3 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) self._update_ll_params() print "priority: {}".format(priority) if priority == 0: # During resampling phases, there must be changes added to sampling_trace if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]: reward = 0 if len(plan.get_failed_preds()) == 0: reward = len(plan.actions) else: failed_t = plan.get_failed_pred()[2] for i in range(len(plan.actions)): if failed_t > plan.actions[i].active_timesteps[1]: reward += 1 plan.sampling_trace[-1]['reward'] = reward ##Restore free_attrs values plan.restore_free_attrs() return success