def get_value(self, plan, penalty_coeff=1e0): model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan) model.update() self._bexpr_to_pred = {} obj_bexprs = self._get_trajopt_obj(plan) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=False) return self._prob.get_value(penalty_coeff)
def _solve_opt_prob(self, plan, priority, callback=None, active_ts=None, verbose=False, resample=True): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts==None: active_ts = (0, plan.horizon-1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose) tol = 1e-2 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon-1) obj_bexprs = [] if resample: failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) self._add_first_and_last_timesteps_of_actions( plan, priority=-1, add_nonlin=True, verbose=verbose) self._add_all_timesteps_of_actions( plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-2 elif priority == 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-4 solv = Solver() solv.initial_penalty_coeff = self.init_penalty_coeff success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() self._failed_groups = self._prob.nonconverged_groups return success
def monitor_update(self, plan, update_values, callback=None, n_resamples=5, active_ts=None, verbose=False): print 'Resolving after environment update...\n' if callback is not None: viewer = callback() if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # _free_attrs is paied attentioned in here self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} tol = 1e-3 obj_bexprs = [] rs_obj = self._update(plan, update_values) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_all_timesteps_of_actions(plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts= active_ts, verbose=verbose) obj_bexprs.extend(rs_obj) self._add_obj_bexprs(obj_bexprs) initial_trust_region_size = 1e-2 solv = Solver() solv.initial_trust_region_size = initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() if DEBUG: assert not plan.has_nan(active_ts) plan.restore_free_attrs() self.reset_variable() print "monitor_update\n" return success
class NAMOSolver(LLSolver): def __init__(self, early_converge=True, transfer_norm='min-vel'): self.transfer_coeff = 1e1 self.rs_coeff = 1e6 self.init_penalty_coeff = 1e2 self.child_solver = None self._param_to_ll = {} self._failed_groups = [] self.early_converge=early_converge self.transfer_norm = transfer_norm def backtrack_solve(self, plan, callback=None, verbose=False): plan.save_free_attrs() success = self._backtrack_solve(plan, callback, anum=0, verbose=verbose) plan.restore_free_attrs() return success def _backtrack_solve(self, plan, callback=None, anum=0, verbose=False): if anum > len(plan.actions) - 1: return True a = plan.actions[anum] active_ts = a.active_timesteps inits = {} if a.name == 'moveto': ## find possible values for the final pose rs_param = a.params[2] elif a.name == 'movetoholding': ## find possible values for the final pose rs_param = a.params[2] elif a.name == 'grasp': ## sample the grasp/grasp_pose rs_param = a.params[4] elif a.name == 'putdown': ## sample the end pose rs_param = a.params[4] else: raise NotImplemented def recursive_solve(): ## don't optimize over any params that are already set old_params_free = {} for p in plan.params.itervalues(): if p.is_symbol(): if p not in a.params: continue old_params_free[p] = p._free_attrs['value'].copy() p._free_attrs['value'][:] = 0 else: old_params_free[p] = p._free_attrs['pose'][:, active_ts[1]].copy() p._free_attrs['pose'][:, active_ts[1]] = 0 self.child_solver = NAMOSolver() if self.child_solver._backtrack_solve(plan, callback=callback, anum=anum+1, verbose=verbose): return True ## reset free_attrs for p in a.params: if p.is_symbol(): if p not in a.params: continue p._free_attrs['value'] = old_params_free[p] else: p._free_attrs['pose'][:, active_ts[1]] = old_params_free[p] return False if not np.all(rs_param._free_attrs['value']): ## this parameter is fixed if callback is not None: callback_a = lambda: callback(a) else: callback_a = None self.child_solver = NAMOSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts = active_ts, verbose=verbose, force_init=True) if not success: ## if planning fails we're done return False ## no other options, so just return here return recursive_solve() ## so that this won't be optimized over rs_free = rs_param._free_attrs['value'].copy() rs_param._free_attrs['value'][:] = 0 targets = plan.get_param('InContact', 2, {1:rs_param}, negated=False) if len(targets) > 1: import pdb; pdb.set_trace() if callback is not None: callback_a = lambda: callback(a) else: callback_a = None robot_poses = [] if len(targets) == 0 or np.all(targets[0]._free_attrs['value']): ## sample 4 possible poses coords = list(itertools.product(range(WIDTH), range(HEIGHT))) random.shuffle(coords) robot_poses = [np.array(x)[:, None] for x in coords[:4]] elif np.any(targets[0]._free_attrs['value']): ## there shouldn't be only some free_attrs set raise NotImplementedError else: grasp_dirs = [np.array([0, -1]), np.array([1, 0]), np.array([0, 1]), np.array([-1, 0])] grasp_len = plan.params['pr2'].geom.radius + targets[0].geom.radius - dsafe for g_dir in grasp_dirs: grasp = (g_dir*grasp_len).reshape((2, 1)) robot_poses.append(targets[0].value + grasp) for rp in robot_poses: rs_param.value = rp success = False self.child_solver = NAMOSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts = active_ts, verbose=verbose, force_init=True) if success: if recursive_solve(): break else: success = False rs_param._free_attrs['value'] = rs_free return success def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False): success = False if force_init or not plan.initialized: ## solve at priority -1 to get an initial value for the parameters self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose) plan.initialized=True success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) success = plan.satisfied(active_ts) if success: return success for _ in range(n_resamples): ## refinement loop ## priority 0 resamples the first failed predicate in the plan ## and then solves a transfer optimization that only includes linear constraints self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose) success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) success = plan.satisfied(active_ts) if success: return success return success # @profile def _solve_opt_prob(self, plan, priority, callback=None, active_ts=None, verbose=False, resample=True): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts==None: active_ts = (0, plan.horizon-1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose) tol = 1e-2 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon-1) obj_bexprs = [] if resample: failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) self._add_first_and_last_timesteps_of_actions( plan, priority=-1, add_nonlin=True, verbose=verbose) self._add_all_timesteps_of_actions( plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-2 elif priority == 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-4 solv = Solver() solv.initial_penalty_coeff = self.init_penalty_coeff success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() self._failed_groups = self._prob.nonconverged_groups return success def get_value(self, plan, penalty_coeff=1e0): model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan) model.update() self._bexpr_to_pred = {} obj_bexprs = self._get_trajopt_obj(plan) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=False) return self._prob.get_value(penalty_coeff) def _get_transfer_obj(self, plan, norm): transfer_objs = [] if norm in ['min-vel', 'l2']: for param in plan.params.values(): # if param._type in ['Robot', 'Can']: K = 2 if param.is_symbol(): T = 1 pose = param.value else: T = plan.horizon pose = param.pose assert (K, T) == pose.shape KT = K*T if norm == 'min-vel' and not param.is_symbol(): v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) else: ## l2 Q = np.eye(KT) cur_pose = pose.reshape((KT, 1), order='F') A = -2*cur_pose.T.dot(Q) b = cur_pose.T.dot(Q.dot(cur_pose)) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2*self.transfer_coeff*Q, self.transfer_coeff*A, self.transfer_coeff*b) ll_param = self._param_to_ll[param] if param.is_symbol(): ll_grb_vars = ll_param.value.reshape((KT, 1), order='F') else: ll_grb_vars = ll_param.pose.reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(ll_grb_vars, cur_pose)) transfer_objs.append(bexpr) elif norm == 'straightline': return self._get_trajopt_obj(plan) else: raise NotImplementedError return transfer_objs def _resample(self, plan, preds): val, attr_inds = None, None for negated, pred, t in preds: ## returns a vector of new values and an ## attr_inds (OrderedDict) that gives the mapping ## to parameter attributes if (self.early_converge and self._failed_groups != ['all'] and self._failed_groups != [] and not np.any([param.name in self._failed_groups for param in pred.params])): ## using early converge and one group didn't converge ## but no parameter from this pred in that group continue val, attr_inds = pred.resample(negated, t, plan) ## no resample defined for that pred if val is not None: break if val is None: # import pdb; pdb.set_trace() return [] t_local = t bexprs = [] i = 0 for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] if p.is_symbol(): t_local = 0 n_vals = 0 grb_vars = [] for attr, ind_arr in attr_inds[p]: n_vals += len(ind_arr) grb_vars.extend( list(getattr(ll_p, attr)[ind_arr, t_local])) for j, grb_var in enumerate(grb_vars): ## create an objective saying stay close to this value ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2 Q = np.eye(1) A = -2*val[i+j]*np.ones((1, 1)) b = np.ones((1, 1))*np.power(val[i+j], 2) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2*Q*self.rs_coeff, A*self.rs_coeff, b*self.rs_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order='F') init_val = np.ones((1, 1))*val[i+j] bexpr = BoundExpr(quad_expr, Variable(v_arr, val[i+j].reshape((1, 1)))) bexprs.append(bexpr) i += n_vals return bexprs def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False): # verbose=True ## for debugging ignore_preds = [] priority = np.maximum(priority, 0) if not pred_dict['hl_info'] == "hl_state": start, end = pred_dict['active_timesteps'] active_range = range(start, end+1) if verbose: print "pred being added: ", pred_dict print active_range, effective_timesteps negated = pred_dict['negated'] pred = pred_dict['pred'] if pred.get_type() in ignore_preds: return if pred.priority > priority: return assert isinstance(pred, common_predicates.ExprPredicate) expr = pred.get_expr(negated) for t in effective_timesteps: if t in active_range: if expr is not None: if add_nonlin or isinstance(expr.expr, AffExpr): if verbose: print "expr being added at time ", t var = self._spawn_sco_var_for_pred(pred, t) bexpr = BoundExpr(expr, var) self._bexpr_to_pred[bexpr] = (negated, pred, t) groups = ['all'] if self.early_converge: ## this will check for convergence per parameter ## this is good if e.g., a single trajectory quickly ## gets stuck groups.extend([param.name for param in pred.params]) self._prob.add_cnt_expr(bexpr, groups) def _add_first_and_last_timesteps_of_actions(self, plan, priority = MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False): if active_ts==None: active_ts = (0, plan.horizon-1) for action in plan.actions: action_start, action_end = action.active_timesteps ## only add an action if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue for pred_dict in action.preds: if action_start >= active_ts[0]: self._add_pred_dict(pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose) if action_end <= active_ts[1]: self._add_pred_dict(pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose) ## add all of the linear ineqs timesteps = range(max(action_start+1, active_ts[0]), min(action_end, active_ts[1])) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose) def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False): if active_ts==None: active_ts = (0, plan.horizon-1) for action in plan.actions: action_start, action_end = action.active_timesteps if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1])+1) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose) def _update_ll_params(self): for ll_param in self._param_to_ll.values(): ll_param.update_param() if self.child_solver: self.child_solver._update_ll_params() def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None): if active_ts == None: active_ts=(0, plan.horizon-1) horizon = active_ts[1] - active_ts[0] + 1 self._param_to_ll = {} self.ll_start = active_ts[0] for param in plan.params.values(): ll_param = LLParam(model, param, horizon, active_ts) ll_param.create_grb_vars() self._param_to_ll[param] = ll_param model.update() for ll_param in self._param_to_ll.values(): ll_param.batch_add_cnts() def _add_obj_bexprs(self, obj_bexprs): for bexpr in obj_bexprs: self._prob.add_obj_expr(bexpr) def _get_trajopt_obj(self, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon-1) start, end = active_ts traj_objs = [] for param in plan.params.values(): if param not in self._param_to_ll: continue if param._type in ['Robot', 'Can']: T = end - start + 1 K = 2 pose = param.pose KT = K*T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) Q *= TRAJOPT_COEFF quad_expr = QuadExpr(Q, np.zeros((1,KT)), np.zeros((1,1))) robot_ll = self._param_to_ll[param] robot_ll_grb_vars = robot_ll.pose.reshape((KT, 1), order='F') init_val = param.pose[:, start:end+1].reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars, init_val)) traj_objs.append(bexpr) return traj_objs
class DrivingSolver(LLSolver): def __init__(self, early_converge=False, transfer_norm='min-vel'): # To avoid numerical difficulties during optimization, try keep # range of coefficeint within 1e9 # (largest_coefficient/smallest_coefficient < 1e9) self.transfer_coeff = 1e0 self.rs_coeff = 5e1 self.trajopt_coeff = 1e0 self.initial_trust_region_size = 1e-2 self.init_penalty_coeff = 4e3 self.smooth_penalty_coeff = 7e4 self.max_merit_coeff_increases = 5 self._param_to_ll = {} self.early_converge=early_converge self.child_solver = None self.solve_priorities = [0, 1, 2, 3] self.transfer_norm = transfer_norm self.grb_init_mapping = {} self.var_list = [] self._grb_to_var_ind = {} self.tol = 1e-3 def _solve_helper(self, plan, callback, active_ts, verbose): # certain constraints should be solved first success = False for priority in self.solve_priorities: success = self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose) return success def backtrack_solve(self, plan, callback=None, verbose=False, n_resamples=5): plan.save_free_attrs() success = self._backtrack_solve(plan, callback, anum=0, verbose=verbose, n_resamples=n_resamples) # plan.restore_free_attrs() return success #@profile def _backtrack_solve(self, plan, callback=None, anum=0, verbose=False, amax = None, n_resamples=5): # if anum == 2: # import ipdb; ipdb.set_trace() if amax is None: amax = len(plan.actions) - 1 if anum > amax: return True a = plan.actions[anum] print "backtracking Solve on {}".format(a.name) active_ts = a.active_timesteps inits = {} if a.name == 'drive_down_road': rs_param = a.params[3] else: raise NotImplemented def recursive_solve(): # import ipdb; ipdb.set_trace() ## don't optimize over any params that are already set old_params_free = {} for p in plan.params.itervalues(): if p.is_symbol(): if p not in a.params: continue old_params_free[p] = p._free_attrs p._free_attrs = {} for attr in old_params_free[p].keys(): p._free_attrs[attr] = np.zeros(old_params_free[p][attr].shape) else: p_attrs = {} old_params_free[p] = p_attrs for attr in p._free_attrs: p_attrs[attr] = p._free_attrs[attr][:, active_ts[1]].copy() p._free_attrs[attr][:, active_ts[1]] = 0 self.child_solver = DrivingSolver() success = self.child_solver._backtrack_solve(plan, callback=callback, anum=anum+1, verbose=verbose, amax = amax) # reset free_attrs for p in plan.params.itervalues(): if p.is_symbol(): if p not in a.params: continue p._free_attrs = old_params_free[p] else: for attr in p._free_attrs: p._free_attrs[attr][:, active_ts[1]] = old_params_free[p][attr] return success # if there is no parameter to resample or some part of rs_param is fixed, then go ahead optimize over this action if rs_param is None or sum([not np.all(rs_param._free_attrs[attr]) for attr in rs_param._free_attrs.keys() ]): ## this parameter is fixed if callback is not None: callback_a = lambda: callback(a) else: callback_a = None self.child_solver = DrivingSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=n_resamples, active_ts = active_ts, verbose=verbose, force_init=True) if not success: ## if planning fails we're done return False ## no other options, so just return here return recursive_solve() ## so that this won't be optimized over rs_free = rs_param._free_attrs rs_param._free_attrs = {} for attr in rs_free.keys(): rs_param._free_attrs[attr] = np.zeros(rs_free[attr].shape) """ sampler_begin """ vehicle_poses = self.obj_pose_suggester(plan, anum, resample_size=3) if not vehicle_poses: success = False # print "Using Random Poses" # robot_poses = self.random_pose_suggester(plan, anum, resample_size = 5) """ sampler end """ if callback is not None: callback_a = lambda: callback(a) else: callback_a = None for rp in vehicle_poses: for attr, val in rp.iteritems(): setattr(rs_param, attr, val) success = False self.child_solver = DrivingSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=n_resamples, active_ts = active_ts, verbose=verbose, force_init=True) if success: if recursive_solve(): break else: success = False rs_param._free_attrs = rs_free return success #@profile def random_pose_suggester(self, plan, anum, resample_size=5): pass #@profile def obj_pose_suggester(self, plan, anum, resample_size=20): vehicle_poses = [] assert anum + 1 <= len(plan.actions) if anum + 1 < len(plan.actions): act, next_act = plan.actions[anum], plan.actions[anum+1] else: act, next_act = plan.actions[anum], None vehicle = act.params[0] start_ts, end_ts = act.active_timesteps for i in range(resample_size): if act.name == "drive_down_road": road = act.params[1] init_pos = act.params[2] direction = road.geom.direction dist = road.geom.length / 2 x = road.geom.x y = road.geom.y final_xy = x + dist * np.cos(direction), y + dist * np.sin(direction) vehicle_poses.append({'xy': final_xy, 'theta': direction, 'vel': 0, 'phi': 0, 'u1': 0, 'u2': 0, 'value': 0}) else: raise NotImplementedError if not vehicle_poses: print "Unable to find IK" # import ipdb; ipdb.set_trace() return vehicle_poses #@profile def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False): success = False if callback is not None: viewer = callback() if force_init or not plan.initialized: self._solve_opt_prob(plan, priority=-2, callback=callback, active_ts=active_ts, verbose=verbose) # self._solve_opt_prob(plan, priority=-1, callback=callback, # active_ts=active_ts, verbose=verbose) plan.initialized=True if success or len(plan.get_failed_preds(active_ts = active_ts)) == 0: return True for priority in self.solve_priorities: for attempt in range(n_resamples): ## refinement loop success = self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose) try: if DEBUG: plan.check_cnt_violation(active_ts = active_ts, priority = priority, tol = 1e-3) except: print "error in predicate checking" if success: break self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose, resample = True) # if len(plan.get_failed_preds(active_ts=active_ts, tol=1e-3)) > 9: # break print "resample attempt: {}".format(attempt) try: if DEBUG: plan.check_cnt_violation(active_ts = active_ts, priority = priority, tol = 1e-3) except: print "error in predicate checking" assert not (success and not len(plan.get_failed_preds(active_ts = active_ts, priority = priority, tol = 1e-3)) == 0) if not success: return False return success #@profile def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False, resample=False, smoothing = False): if callback is not None: viewer = callback() self.plan = plan vehicle = plan.params['vehicle'] if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() initial_trust_region_size = self.initial_trust_region_size if resample: tol = 1e-3 def variable_helper(): error_bin = [] for sco_var in self._prob._vars: for grb_var, val in zip(sco_var.get_grb_vars(), sco_var.get_value()): grb_name = grb_var[0].VarName one, two = grb_name.find('-'), grb_name.find('-(') param_name = grb_name[1:one] attr = grb_name[one+1:two] index = eval(grb_name[two+1:-1]) param = plan.params[param_name] if not np.allclose(val, getattr(param, attr)[index]): error_bin.append((grb_name, val, getattr(param, attr)[index])) if len(error_bin) != 0: print "something wrong" if DEBUG: import ipdb; ipdb.set_trace() """ When Optimization fails, resample new values for certain timesteps of the trajectory and solver as initialization """ obj_bexprs = [] ## this is an objective that places ## a high value on matching the resampled values failed_preds = plan.get_failed_preds(active_ts = active_ts, priority=priority, tol = tol) rs_obj = self._resample(plan, failed_preds, sample_all = True) # import ipdb; ipdb.set_trace() # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory. # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=False, active_ts= active_ts, verbose=verbose) obj_bexprs.extend(rs_obj) self._add_obj_bexprs(obj_bexprs) initial_trust_region_size = 1e3 # import ipdb; ipdb.set_trace() else: self._bexpr_to_pred = {} if priority == -2: """ Initialize an linear trajectory while enforceing the linear constraints in the intermediate step. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) tol = 1e-1 initial_trust_region_size = 1e3 elif priority == -1: """ Solve the optimization problem while enforcing every constraints. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority >= 0: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-3 solv = Solver() solv.initial_trust_region_size = initial_trust_region_size if smoothing: solv.initial_penalty_coeff = self.smooth_penalty_coeff else: solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) if priority == MAX_PRIORITY: success = success or len(plan.get_failed_preds(tol=tol, active_ts=active_ts, priority=priority)) == 0 self._update_ll_params() if DEBUG: assert not plan.has_nan(active_ts) if resample: # During resampling phases, there must be changes added to sampling_trace if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]: reward = 0 if len(plan.get_failed_preds(active_ts = active_ts, priority=priority)) == 0: reward = len(plan.actions) else: failed_t = plan.get_failed_pred(active_ts=(0,active_ts[1]), priority=priority)[2] for i in range(len(plan.actions)): if failed_t > plan.actions[i].active_timesteps[1]: reward += 1 plan.sampling_trace[-1]['reward'] = reward ##Restore free_attrs values plan.restore_free_attrs() self.reset_variable() print "priority: {}\n".format(priority) return success #@profile def traj_smoother(self, plan, callback=None, n_resamples=5, verbose=False): # plan.save_free_attrs() a_num = 0 success = True while a_num < len(plan.actions) - 1: act_1 = plan.actions[a_num] act_2 = plan.actions[a_num+1] active_ts = (act_1.active_timesteps[0], act_2.active_timesteps[1]) # print active_ts old_params_free = {} for p in plan.params.itervalues(): if p.is_symbol(): if p in act_1.params or p in act_2.params: continue old_params_free[p] = p._free_attrs p._free_attrs = {} for attr in old_params_free[p].keys(): p._free_attrs[attr] = np.zeros(old_params_free[p][attr].shape) else: p_attrs = {} old_params_free[p] = p_attrs for attr in p._free_attrs: p_attrs[attr] = [p._free_attrs[attr][:, :active_ts[0]].copy(), p._free_attrs[attr][:, active_ts[1]:].copy()] p._free_attrs[attr][:, active_ts[1]:] = 0 p._free_attrs[attr][:, :active_ts[0]] = 0 success = self._traj_smoother(plan, callback, n_resamples, active_ts, verbose) # reset free_attrs for p in plan.params.itervalues(): if p.is_symbol(): if p in act_1.params or p in act_2.params: continue p._free_attrs = old_params_free[p] else: for attr in p._free_attrs: p._free_attrs[attr][:, :active_ts[0]] = old_params_free[p][attr][0] p._free_attrs[attr][:, active_ts[1]:] = old_params_free[p][attr][1] if not success: return success print 'Actions: {} and {}'.format(plan.actions[a_num].name, plan.actions[a_num+1].name) a_num += 1 # try: # success = self._traj_smoother(plan, callback, n_resamples, active_ts, verbose) # except: # print "Error occured during planning, but not catched" # return False # plan.restore_free_attrs() return success # @profile def _traj_smoother(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False): print "Smoothing Trajectory..." priority = MAX_PRIORITY for attempt in range(n_resamples): # refinement loop print 'Smoother iteration #: {}\n'.format(attempt) success = self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose, resample = False, smoothing = True) if success: break plan.check_cnt_violation(tol = 1e-3) self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose, resample = True, smoothing = True) return success #@profile def _get_transfer_obj(self, plan, norm): """ This function returns the expression e(x) = P|x - cur|^2 Which says the optimized trajectory should be close to the previous trajectory. Where P is the KT x KT matrix, where Px is the difference of parameter's attributes' current value and parameter's next timestep value """ transfer_objs = [] if norm == 'min-vel': for param in plan.params.values(): for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): param_ll = self._param_to_ll[param] if param.is_symbol(): T = 1 attr_val = getattr(param, attr_name) else: T = param_ll._horizon attr_val = getattr(param, attr_name)[:, param_ll.active_ts[0]:param_ll.active_ts[1]+1] K = attr_type.dim # pose = param.pose if DEBUG: assert (K, T) == attr_val.shape KT = K*T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) # P = np.eye(KT) Q = np.dot(np.transpose(P), P) if not param.is_symbol() else np.eye(KT) cur_val = attr_val.reshape((KT, 1), order='F') A = -2*cur_val.T.dot(Q) b = cur_val.T.dot(Q.dot(cur_val)) transfer_coeff = self.transfer_coeff/float(plan.horizon) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2*transfer_coeff*Q, transfer_coeff*A, transfer_coeff*b) ll_attr_val = getattr(param_ll, attr_name) param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F') sco_var = self.create_variable(param_ll_grb_vars, cur_val) bexpr = BoundExpr(quad_expr, sco_var) transfer_objs.append(bexpr) else: raise NotImplemented return transfer_objs #@profile def create_variable(self, grb_vars, init_vals, save=False): """ if save is Ture Update the grb_init_mapping so that each grb_var is mapped to the right initial values. Then find the sco variables that includes the grb variables we are updating and change the corresponding initial values inside of it. if save is False Iterate the var_list and use the last initial value used for each gurobi, and construct the sco variables """ sco_var, grb_val_map, ret_val = None, {}, [] for grb, v in zip(grb_vars.flatten(), init_vals.flatten()): grb_name = grb.VarName if save: self.grb_init_mapping[grb_name] = v grb_val_map[grb_name] = self.grb_init_mapping.get(grb_name, v) ret_val.append(grb_val_map[grb_name]) if grb_name in self._grb_to_var_ind.keys(): for var, i in self._grb_to_var_ind[grb_name]: var._value[i] = grb_val_map[grb_name] if np.all(var._grb_vars is grb_vars): sco_var = var if sco_var is None: sco_var = Variable(grb_vars, np.array(ret_val).reshape((len(ret_val), 1))) self.var_list.append(sco_var) for i, grb in enumerate(grb_vars.flatten()): index_val_list = self._grb_to_var_ind.get(grb.VarName, []) index_val_list.append((sco_var, i)) self._grb_to_var_ind[grb.VarName] = index_val_list if DEBUG: self.check_sync() return sco_var #@profile def check_grb_sync(self, grb_name): for var, i in self._grb_to_var_ind[grb_name]: print var._grb_vars[i][0].VarName, var._value[i] #@profile def check_sync(self): """ This function checks whether all sco variable are synchronized """ grb_val_map = {} correctness = True for grb_name in self._grb_to_var_ind.keys(): for var, i in self._grb_to_var_ind[grb_name]: try: correctness = np.allclose(grb_val_map[grb_name], var._value[i], equal_nan=True) except KeyError: grb_val_map[grb_name] = var._value[i] except: print "something went wrong" import ipdb; ipdb.set_trace() if not correctness: import ipdb; ipdb.set_trace() def reset_variable(self): self.grb_init_mapping = {} self.var_list = [] self._grb_to_var_ind = {} def monitor_update(self, plan, update_values, callback=None, n_resamples=5, active_ts=None, verbose=False): print 'Resolving after environment update...\n' if callback is not None: viewer = callback() if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # _free_attrs is paied attentioned in here self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} tol = 1e-3 obj_bexprs = [] rs_obj = self._update(plan, update_values) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_all_timesteps_of_actions(plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts= active_ts, verbose=verbose) obj_bexprs.extend(rs_obj) self._add_obj_bexprs(obj_bexprs) initial_trust_region_size = 1e-2 solv = Solver() solv.initial_trust_region_size = initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() if DEBUG: assert not plan.has_nan(active_ts) plan.restore_free_attrs() self.reset_variable() print "monitor_update\n" return success def _update(self, plan, update_values): bexprs = [] for val, attr_inds in update_values: if val is not None: for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] n_vals, i = 0, 0 grb_vars = [] for attr, ind_arr, t in attr_inds[p]: for j, grb_var in enumerate(getattr(ll_p, attr)[ind_arr, t-ll_p.active_ts[0]].flatten()): Q = np.eye(1) A = -2*val[p][i+j]*np.ones((1, 1)) b = np.ones((1, 1))*np.power(val[p][i+j], 2) resample_coeff = self.rs_coeff/float(plan.horizon) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2*Q*resample_coeff, A*resample_coeff, b*resample_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order='F') init_val = np.ones((1, 1))*val[p][i+j] sco_var = self.create_variable(v_arr, np.array([val[p][i+j]]).reshape((1, 1)), save = True) bexpr = BoundExpr(quad_expr, sco_var) bexprs.append(bexpr) i += len(ind_arr) return bexprs #@profile def _resample(self, plan, preds, sample_all = False): """ This function first calls fail predicate's resample function, then, uses the resampled value to create a square difference cost function e(x) = |x - rs_val|^2 that will be minimized later. rs_val is the resampled value """ bexprs = [] val, attr_inds = None, None pred_type = {} for negated, pred, t in preds: ## returns a vector of new values and an ## attr_inds (OrderedDict) that gives the mapping ## to parameter attributes if pred_type.get(pred.get_type, False): continue val, attr_inds = pred.resample(negated, t, plan) if val is not None: pred_type[pred.get_type] = True ## if no resample defined for that pred, continue if val is not None: for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] n_vals, i = 0, 0 grb_vars = [] for attr, ind_arr, t in attr_inds[p]: for j, grb_var in enumerate(getattr(ll_p, attr)[ind_arr, t-ll_p.active_ts[0]].flatten()): Q = np.eye(1) A = -2*val[p][i+j]*np.ones((1, 1)) b = np.ones((1, 1))*np.power(val[p][i+j], 2) resample_coeff = self.rs_coeff/float(plan.horizon) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2*Q*resample_coeff, A*resample_coeff, b*resample_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order='F') init_val = np.ones((1, 1))*val[p][i+j] sco_var = self.create_variable(v_arr, np.array([val[p][i+j]]).reshape((1, 1)), save = True) bexpr = BoundExpr(quad_expr, sco_var) bexprs.append(bexpr) i += len(ind_arr) if not sample_all: break return bexprs #@profile def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False): """ This function creates constraints for the predicate and added to Prob class in sco. """ ## for debugging ignore_preds = [] priority = np.maximum(priority, 0) if not pred_dict['hl_info'] == "hl_state": start, end = pred_dict['active_timesteps'] active_range = range(start, end+1) if verbose: print "pred being added: ", pred_dict print active_range, effective_timesteps negated = pred_dict['negated'] pred = pred_dict['pred'] if pred.get_type() in ignore_preds: return if pred.priority > priority: return if DEBUG: assert isinstance(pred, common_predicates.ExprPredicate) expr = pred.get_expr(negated) if expr is not None: if add_nonlin or isinstance(expr.expr, AffExpr): for t in effective_timesteps: if t in active_range: if verbose: print "expr being added at time ", t var = self._spawn_sco_var_for_pred(pred, t) bexpr = BoundExpr(expr, var) # TODO: REMOVE line below, for tracing back predicate for debugging. if DEBUG: bexpr.source = (negated, pred, t) self._bexpr_to_pred[bexpr] = (negated, pred, t) groups = ['all'] if self.early_converge: ## this will check for convergence per parameter ## this is good if e.g., a single trajectory quickly ## gets stuck groups.extend([param.name for param in pred.params]) self._prob.add_cnt_expr(bexpr, groups) #@profile def _add_first_and_last_timesteps_of_actions(self, plan, priority = MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False): """ Adding only non-linear constraints on the first and last timesteps of each action. """ if active_ts is None: active_ts = (0, plan.horizon-1) for action in plan.actions: action_start, action_end = action.active_timesteps ## only add an action if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue for pred_dict in action.preds: if action_start >= active_ts[0]: self._add_pred_dict(pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose) if action_end <= active_ts[1]: self._add_pred_dict(pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose) ## add all of the linear ineqs timesteps = range(max(action_start+1, active_ts[0]), min(action_end, active_ts[1])) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose) #@profile def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False): """ This function adds both linear and non-linear predicates from actions that are active within the range of active_ts. """ if active_ts==None: active_ts = (0, plan.horizon-1) for action in plan.actions: action_start, action_end = action.active_timesteps if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1])+1) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose) #@profile def _update_ll_params(self): """ update plan's parameters from low level grb_vars. expected to be called after each optimization. """ for ll_param in self._param_to_ll.values(): ll_param.update_param() if self.child_solver: self.child_solver._update_ll_params() #@profile def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None): """ This function creates low level parameters for each parameter in the plan, initialized he corresponding grb_vars for each attributes in each timestep, update the grb models adds in equality constraints, construct a dictionary as param-to-ll_param mapping. """ if active_ts == None: active_ts=(0, plan.horizon-1) horizon = active_ts[1] - active_ts[0] + 1 self._param_to_ll = {} self.ll_start = active_ts[0] for param in plan.params.values(): ll_param = LLParam(model, param, horizon, active_ts) ll_param.create_grb_vars() self._param_to_ll[param] = ll_param model.update() for ll_param in self._param_to_ll.values(): ll_param.batch_add_cnts() #@profile def _add_obj_bexprs(self, obj_bexprs): """ This function adds objective bounded expressions to the Prob class in sco. """ for bexpr in obj_bexprs: self._prob.add_obj_expr(bexpr) #@profile def _get_trajopt_obj(self, plan, active_ts=None): """ This function selects parameter of type Robot and Can and returns the expression e(x) = |Px|^2 Which optimize trajectory so that robot and can's attributes in current timestep is close to that of next timestep. forming a straight line between each end points. Where P is the KT x KT matrix, where Px is the difference of value in current timestep compare to next timestep """ if active_ts == None: active_ts = (0, plan.horizon-1) start, end = active_ts traj_objs = [] for param in plan.params.values(): if param not in self._param_to_ll: continue if isinstance(param, Object): for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): T = end - start + 1 K = attr_type.dim attr_val = getattr(param, attr_name) KT = K*T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) Q *= self.trajopt_coeff/float(plan.horizon) quad_expr = None quad_expr = QuadExpr(Q, np.zeros((1,KT)), np.zeros((1,1))) param_ll = self._param_to_ll[param] ll_attr_val = getattr(param_ll, attr_name) param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F') attr_val = getattr(param, attr_name) init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F') sco_var = self.create_variable(param_ll_grb_vars, init_val) bexpr = BoundExpr(quad_expr, sco_var) traj_objs.append(bexpr) return traj_objs
def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False, resample=False, smoothing = False): if callback is not None: viewer = callback() self.plan = plan vehicle = plan.params['vehicle'] if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() initial_trust_region_size = self.initial_trust_region_size if resample: tol = 1e-3 def variable_helper(): error_bin = [] for sco_var in self._prob._vars: for grb_var, val in zip(sco_var.get_grb_vars(), sco_var.get_value()): grb_name = grb_var[0].VarName one, two = grb_name.find('-'), grb_name.find('-(') param_name = grb_name[1:one] attr = grb_name[one+1:two] index = eval(grb_name[two+1:-1]) param = plan.params[param_name] if not np.allclose(val, getattr(param, attr)[index]): error_bin.append((grb_name, val, getattr(param, attr)[index])) if len(error_bin) != 0: print "something wrong" if DEBUG: import ipdb; ipdb.set_trace() """ When Optimization fails, resample new values for certain timesteps of the trajectory and solver as initialization """ obj_bexprs = [] ## this is an objective that places ## a high value on matching the resampled values failed_preds = plan.get_failed_preds(active_ts = active_ts, priority=priority, tol = tol) rs_obj = self._resample(plan, failed_preds, sample_all = True) # import ipdb; ipdb.set_trace() # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory. # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=False, active_ts= active_ts, verbose=verbose) obj_bexprs.extend(rs_obj) self._add_obj_bexprs(obj_bexprs) initial_trust_region_size = 1e3 # import ipdb; ipdb.set_trace() else: self._bexpr_to_pred = {} if priority == -2: """ Initialize an linear trajectory while enforceing the linear constraints in the intermediate step. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) tol = 1e-1 initial_trust_region_size = 1e3 elif priority == -1: """ Solve the optimization problem while enforcing every constraints. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority >= 0: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-3 solv = Solver() solv.initial_trust_region_size = initial_trust_region_size if smoothing: solv.initial_penalty_coeff = self.smooth_penalty_coeff else: solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) if priority == MAX_PRIORITY: success = success or len(plan.get_failed_preds(tol=tol, active_ts=active_ts, priority=priority)) == 0 self._update_ll_params() if DEBUG: assert not plan.has_nan(active_ts) if resample: # During resampling phases, there must be changes added to sampling_trace if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]: reward = 0 if len(plan.get_failed_preds(active_ts = active_ts, priority=priority)) == 0: reward = len(plan.actions) else: failed_t = plan.get_failed_pred(active_ts=(0,active_ts[1]), priority=priority)[2] for i in range(len(plan.actions)): if failed_t > plan.actions[i].active_timesteps[1]: reward += 1 plan.sampling_trace[-1]['reward'] = reward ##Restore free_attrs values plan.restore_free_attrs() self.reset_variable() print "priority: {}\n".format(priority) return success
class CanSolver(LLSolver): def __init__(self, early_converge=False): self.transfer_coeff = 1e1 self.rs_coeff = 1e6 self.initial_trust_region_size = 1e-1 self.init_penalty_coeff = 1e1 # self.init_penalty_coeff = 1e5 self.max_merit_coeff_increases = 5 self._param_to_ll = {} self.early_converge = early_converge self.child_solver = None def backtrack_solve(self, plan, callback=None, anum=0, verbose=False): if anum > len(plan.actions) - 1: return True a = plan.actions[anum] active_ts = a.active_timesteps inits = {} # Moveto -> Grasp -> Movetoholding -> Putdown if a.name == "moveto": ## moveto: (?robot - Robot ?start - RobotPose ?end - RobotPose) ## find possible values for the final pose rs_param = a.params[2] elif a.name == "movetoholding": ## movetoholding: (?robot - Robot ?start - RobotPose ?end - RobotPose ?c - Can) ## find possible values for the final pose rs_param = a.params[3] elif a.name == "grasp": ## grasp: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose) ## sample the ee_pose rs_param = a.params[5] elif a.name == "putdown": ## putdown: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose) rs_param = a.params[4] else: raise NotImplemented def recursive_solve(): ## don't optimize over any params that are already set old_params_free = {} for p in plan.params.itervalues(): old_param_map = {} if p.is_symbol(): if p not in a.params: continue for attr in attr_map[getattr(p, "_type")]: old_param_map[attr] = p._free_attrs[attr].copy() p._free_attrs[attr][:] = 0 else: for attr in attr_map[getattr(p, "_type")]: old_param_map[attr] = p._free_attrs[attr][:, active_ts[1]].copy() p._free_attrs[attr][:, active_ts[1]] = 0 old_params_free[p] = old_param_map self.child_solver = CanSolver() if self.child_solver.backtrack_solve(plan, callback=callback, anum=anum + 1, verbose=verbose): return True ## reset free_attrs for p in a.params: if p.is_symbol(): if p not in a.params: continue for attr in attr_map[getattr(p, "_type")]: p._free_attrs[attr] = old_params_free[p][attr] else: for attr in attr_map[getattr(p, "_type")]: p._free_attrs[attr][:, active_ts[1]] = old_params_free[p][attr] return False if rs_param.is_fixed(attr_map[getattr(rs_param, "_type")]): ## this parameter is fixed if callback is not None: callback_a = lambda: callback(a) else: callback_a = None self.child_solver = CanSolver() success = self.child_solver.solve( plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True ) if not success: ## if planning fails we're done return False ## no other options, so just return here return recursive_solve() ## so that this won't be optimized over rs_free = {} for attr in attr_map[getattr(rs_param, "_type")]: rs_free[attr] = rs_param._free_attrs[attr].copy() rs_param._free_attrs[attr][:] = 0 # Target is needed to sample the ee_pose, ee_pose is needed to sample the robot_pose # InContact, Robot, EEPose, Target targets = plan.get_param("InContact", 2, {1: rs_param}, negated=False) # EEReachable, Robot, RobotPose, EEPose robot_pose = plan.get_param("EEReachable", 1, {2: rs_param}, negated=False) if len(targets) > 1: import pdb pdb.set_trace() if callback is not None: callback_a = lambda: callback(a) else: callback_a = None robot_poses = [] if len(targets) == 0 or np.all([targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]): ## sample 4 possible poses coords = list(itertools.product(range(WIDTH), range(HEIGHT))) random.shuffle(coords) robot_poses = [np.array(x)[:, None] for x in coords[:4]] elif np.any([targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]): ## there shouldn't be only some free_attrs set raise NotImplementedError else: grasp_dirs = [np.array([0, -1]), np.array([1, 0]), np.array([0, 1]), np.array([-1, 0])] grasp_len = plan.params["pr2"].geom.radius + targets[0].geom.radius for g_dir in grasp_dirs: grasp = (g_dir * grasp_len).reshape((2, 1)) robot_poses.append(targets[0].value + grasp) for rp in robot_poses: rs_param.value = rp success = False self.child_solver = CanSolver() success = self.child_solver.solve( plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True ) if success: if recursive_solve(): break else: success = False for attr in attr_map[getattr(rs_param, "_type")]: rs_param._free_attrs[attr] = rs_free[attr] return success def sample_ee_from_target(self, target): """ Sample all possible EE Pose that pr2 can grasp with target: parameter of type Target return: list of tuple in the format of (ee_pos, ee_rot) """ possible_ee_poses = [] targ_pos, targ_rot = target.value.flatten(), target.rotation.flatten() ee_pos = targ_pos.copy() target_trans = OpenRAVEBody.transform_from_obj_pose(targ_pos, targ_rot) # rotate can's local z-axis by the amount of linear spacing between 0 to 2pi angle_range = np.linspace(0, np.pi * 2, num=SAMPLE_SIZE) for rot in angle_range: target_trans = OpenRAVEBody.transform_from_obj_pose(targ_pos, targ_rot) # rotate new ee_pose around can's rotation axis rot_mat = matrixFromAxisAngle([0, 0, rot]) ee_trans = target_trans.dot(rot_mat) ee_rot = OpenRAVEBody.obj_pose_from_transform(ee_trans)[3:] possible_ee_poses.append((ee_pos, ee_rot)) return possible_ee_poses def sample_rp_from_ee(self, env, rs_param, ee_pose): """ Using Inverse Kinematics to solve all possible robot armpose and basepose that reaches this ee_pose robot_pose: list of full robot pose with format of(basepose, backHeight, lArmPose, lGripper, rArmPose, rGripper) """ ee_pos, ee_rot = ee_pose[0], ee_pose[1] base_raidus = np.linspace(-1, 1, num=BASE_SAMPLE_SIZE) poses = list(itertools.product(base_raidus, base_raidus)) # sample dummy_body = OpenRAVEBody(env, rs_param.name, PR2()) possible_robot_pose = [] for pos in poses: bp = pos + ee_pos[:2] vec = ee_pos[:2] - bp vec = vec / np.linalg.norm(vec) theta = math.atan2(vec[1], vec[0]) bp = np.array([bp[0], bp[1], theta]) dummy_body.set_pose(bp) arm_poses = robot_body.ik_arm_pose(ee_pos, ee_rot) for arm_pose in arm_poses: possible_robot_pose.append( (bp, ee_arm_poses[0], rs_param.lArmPose, rs_param.lGripper, ee_arm_poses[1:], rs_param.rGripper) ) import ipdb ipdb.set_trace() dummy_body.delete() return possible_robot_pose def closest_arm_pose(self, arm_poses, cur_arm_poses): pass def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False): success = False if force_init or not plan.initialized: ## solve at priority -1 to get an initial value for the parameters self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose) plan.initialized = True success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) if success: return success for _ in range(n_resamples): ## refinement loop ## priority 0 resamples the first failed predicate in the plan ## and then solves a transfer optimization that only includes linear constraints self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose) success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) if success: return success return success def _solve_opt_prob( self, plan, priority, callback=None, init=True, init_from_prev=False, active_ts=None, verbose=False ): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose) tol = 1e-1 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = self._resample(plan, failed_preds) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_transfer_obj(plan, "min-vel")) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, add_nonlin=True, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority == 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-4 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method="penalty_sqp", tol=tol, verbose=True) import ipdb ipdb.set_trace() self._update_ll_params() return success def _get_transfer_obj(self, plan, norm): transfer_objs = [] if norm == "min-vel": for param in plan.params.values(): if param._type in ["Robot", "Can"]: T = plan.horizon K = 2 pose = param.pose assert (K, T) == pose.shape KT = K * T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) cur_pose = pose.reshape((KT, 1), order="F") A = -2 * cur_pose.T.dot(Q) b = cur_pose.T.dot(Q.dot(cur_pose)) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * self.transfer_coeff * Q, self.transfer_coeff * A, self.transfer_coeff * b) ll_param = self._param_to_ll[param] ll_grb_vars = ll_param.pose.reshape((KT, 1), order="F") bexpr = BoundExpr(quad_expr, Variable(ll_grb_vars, cur_pose)) transfer_objs.append(bexpr) else: raise NotImplemented return transfer_objs def _resample(self, plan, preds): val, attr_inds = None, None for negated, pred, t in preds: ## returns a vector of new values and an ## attr_inds (OrderedDict) that gives the mapping ## to parameter attributes val, attr_inds = pred.resample(negated, t, plan) ## no resample defined for that pred if val is not None: break if val is None: return None t_local = t bexprs = [] i = 0 for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] if p.is_symbol(): t_local = 0 n_vals = 0 grb_vars = [] for attr, ind_arr in attr_inds[p]: n_vals += len(ind_arr) grb_vars.extend(list(getattr(ll_p, attr)[ind_arr, t_local])) for j, grb_var in enumerate(grb_vars): ## create an objective saying stay close to this value ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2 Q = np.eye(1) A = -2 * val[i + j] * np.ones((1, 1)) b = np.ones((1, 1)) * np.power(val[i + j], 2) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff, b * self.rs_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order="F") init_val = np.ones((1, 1)) * val[i + j] bexpr = BoundExpr(quad_expr, Variable(v_arr, val[i + j].reshape((1, 1)))) bexprs.append(bexpr) i += n_vals return bexprs def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False): ## for debugging ignore_preds = [] priority = np.maximum(priority, 0) if not pred_dict["hl_info"] == "hl_state": start, end = pred_dict["active_timesteps"] active_range = range(start, end + 1) if verbose: print "pred being added: ", pred_dict print active_range, effective_timesteps negated = pred_dict["negated"] pred = pred_dict["pred"] if pred.get_type() in ignore_preds: return if pred.priority > priority: return assert isinstance(pred, common_predicates.ExprPredicate) expr = pred.get_expr(negated) for t in effective_timesteps: if t in active_range: if expr is not None: if add_nonlin or isinstance(expr.expr, AffExpr): if verbose: print "expr being added at time ", t var = self._spawn_sco_var_for_pred(pred, t) bexpr = BoundExpr(expr, var) # TODO: REMOVE line below, for tracing back predicate for debugging. bexpr.pred = pred self._bexpr_to_pred[bexpr] = (negated, pred, t) groups = ["all"] if self.early_converge: ## this will check for convergence per parameter ## this is good if e.g., a single trajectory quickly ## gets stuck groups.extend([param.name for param in pred.params]) self._prob.add_cnt_expr(bexpr, groups) def _add_first_and_last_timesteps_of_actions( self, plan, priority=MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False ): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps ## only add an action if action_start >= active_ts[1]: continue if action_end < active_ts[0]: continue for pred_dict in action.preds: if action_start >= active_ts[0]: self._add_pred_dict( pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose ) if action_end <= active_ts[1]: self._add_pred_dict( pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose ) ## add all of the linear ineqs timesteps = range(max(action_start + 1, active_ts[0]), min(action_end, active_ts[1])) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose) def _add_all_timesteps_of_actions( self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False ): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps if action_start > active_ts[1]: continue if action_end < active_ts[0]: continue timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1]) + 1) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose) def _update_ll_params(self): for ll_param in self._param_to_ll.values(): ll_param.update_param() if self.child_solver: self.child_solver._update_ll_params() def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) horizon = active_ts[1] - active_ts[0] + 1 self._param_to_ll = {} self.ll_start = active_ts[0] for param in plan.params.values(): ll_param = LLParam(model, param, horizon, active_ts) ll_param.create_grb_vars() self._param_to_ll[param] = ll_param model.update() for ll_param in self._param_to_ll.values(): ll_param.batch_add_cnts() def _add_obj_bexprs(self, obj_bexprs): for bexpr in obj_bexprs: self._prob.add_obj_expr(bexpr) def _get_trajopt_obj(self, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) start, end = active_ts traj_objs = [] for param in plan.params.values(): if param not in self._param_to_ll: continue if param._type in ["Robot", "Can"]: for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): T = end - start + 1 K = attr_type.dim attr_val = getattr(param, attr_name) KT = K * T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) quad_expr = QuadExpr(Q, np.zeros((1, KT)), np.zeros((1, 1))) robot_ll = self._param_to_ll[param] ll_attr_val = getattr(robot_ll, attr_name) robot_ll_grb_vars = ll_attr_val.reshape((KT, 1), order="F") # init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F') # bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars, init_val)) bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars)) traj_objs.append(bexpr) return traj_objs def _spawn_sco_var_for_pred(self, pred, t): i = 0 x = np.empty(pred.x_dim, dtype=object) v = np.empty(pred.x_dim) for p in pred.attr_inds: for attr, ind_arr in pred.attr_inds[p]: n_vals = len(ind_arr) ll_p = self._param_to_ll[p] if p.is_symbol(): x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, 0] v[i : i + n_vals] = getattr(p, attr)[ind_arr, 0] else: x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, t - self.ll_start] v[i : i + n_vals] = getattr(p, attr)[ind_arr, t] i += n_vals if pred.dynamic: ## include the variables from the next time step for p in pred.attr_inds: for attr, ind_arr in pred.attr_inds[p]: n_vals = len(ind_arr) ll_p = self._param_to_ll[p] if p.is_symbol(): x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, 0] v[i : i + n_vals] = getattr(p, attr)[ind_arr, 0] else: x[i : i + n_vals] = getattr(ll_p, attr)[ind_arr, t + 1 - self.ll_start] v[i : i + n_vals] = getattr(p, attr)[ind_arr, t] i += n_vals assert i >= pred.x_dim x = x.reshape((pred.x_dim, 1)) v = v.reshape((pred.x_dim, 1)) return Variable(x, v)
def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts==None: active_ts = (0, plan.horizon-1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -2: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon-1) failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = [] obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel')) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) # self._add_first_and_last_timesteps_of_actions( # plan, priority=-1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) self._add_all_timesteps_of_actions( plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_first_and_last_timesteps_of_actions( # plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority >= 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-3 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) self._update_ll_params() print "priority: {}".format(priority) # if callback is not None: callback(True) return success
class NAMOSolver(LLSolver): def __init__(self, early_converge=True, transfer_norm='min-vel'): self.transfer_coeff = 1e1 self.rs_coeff = 1e6 self.init_penalty_coeff = 1e2 self.child_solver = None self._param_to_ll = {} self._failed_groups = [] self.early_converge = early_converge self.transfer_norm = transfer_norm def backtrack_solve(self, plan, callback=None, verbose=False): plan.save_free_attrs() success = self._backtrack_solve(plan, callback, anum=0, verbose=verbose) plan.restore_free_attrs() return success def _backtrack_solve(self, plan, callback=None, anum=0, verbose=False): if anum > len(plan.actions) - 1: return True a = plan.actions[anum] active_ts = a.active_timesteps inits = {} if a.name == 'moveto': ## find possible values for the final pose rs_param = a.params[2] elif a.name == 'movetoholding': ## find possible values for the final pose rs_param = a.params[2] elif a.name == 'grasp': ## sample the grasp/grasp_pose rs_param = a.params[4] elif a.name == 'putdown': ## sample the end pose rs_param = a.params[4] else: raise NotImplemented def recursive_solve(): ## don't optimize over any params that are already set old_params_free = {} for p in plan.params.itervalues(): if p.is_symbol(): if p not in a.params: continue old_params_free[p] = p._free_attrs['value'].copy() p._free_attrs['value'][:] = 0 else: old_params_free[p] = p._free_attrs[ 'pose'][:, active_ts[1]].copy() p._free_attrs['pose'][:, active_ts[1]] = 0 self.child_solver = NAMOSolver() if self.child_solver._backtrack_solve(plan, callback=callback, anum=anum + 1, verbose=verbose): return True ## reset free_attrs for p in a.params: if p.is_symbol(): if p not in a.params: continue p._free_attrs['value'] = old_params_free[p] else: p._free_attrs['pose'][:, active_ts[1]] = old_params_free[p] return False if not np.all(rs_param._free_attrs['value']): ## this parameter is fixed if callback is not None: callback_a = lambda: callback(a) else: callback_a = None self.child_solver = NAMOSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True) if not success: ## if planning fails we're done return False ## no other options, so just return here return recursive_solve() ## so that this won't be optimized over rs_free = rs_param._free_attrs['value'].copy() rs_param._free_attrs['value'][:] = 0 targets = plan.get_param('InContact', 2, {1: rs_param}, negated=False) if len(targets) > 1: import pdb pdb.set_trace() if callback is not None: callback_a = lambda: callback(a) else: callback_a = None robot_poses = [] if len(targets) == 0 or np.all(targets[0]._free_attrs['value']): ## sample 4 possible poses coords = list(itertools.product(range(WIDTH), range(HEIGHT))) random.shuffle(coords) robot_poses = [np.array(x)[:, None] for x in coords[:4]] elif np.any(targets[0]._free_attrs['value']): ## there shouldn't be only some free_attrs set raise NotImplementedError else: grasp_dirs = [ np.array([0, -1]), np.array([1, 0]), np.array([0, 1]), np.array([-1, 0]) ] grasp_len = plan.params['pr2'].geom.radius + targets[ 0].geom.radius - dsafe for g_dir in grasp_dirs: grasp = (g_dir * grasp_len).reshape((2, 1)) robot_poses.append(targets[0].value + grasp) for rp in robot_poses: rs_param.value = rp success = False self.child_solver = NAMOSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True) if success: if recursive_solve(): break else: success = False rs_param._free_attrs['value'] = rs_free return success def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False): success = False if force_init or not plan.initialized: ## solve at priority -1 to get an initial value for the parameters self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose) plan.initialized = True success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) success = plan.satisfied(active_ts) if success: return success for _ in range(n_resamples): ## refinement loop ## priority 0 resamples the first failed predicate in the plan ## and then solves a transfer optimization that only includes linear constraints self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose) success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) success = plan.satisfied(active_ts) if success: return success return success # @profile def _solve_opt_prob(self, plan, priority, callback=None, active_ts=None, verbose=False, resample=True): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose) tol = 1e-2 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) obj_bexprs = [] if resample: failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, add_nonlin=True, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-2 elif priority == 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-4 solv = Solver() solv.initial_penalty_coeff = self.init_penalty_coeff success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() self._failed_groups = self._prob.nonconverged_groups return success def get_value(self, plan, penalty_coeff=1e0): model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan) model.update() self._bexpr_to_pred = {} obj_bexprs = self._get_trajopt_obj(plan) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=False) return self._prob.get_value(penalty_coeff) def _get_transfer_obj(self, plan, norm): transfer_objs = [] if norm in ['min-vel', 'l2']: for param in plan.params.values(): # if param._type in ['Robot', 'Can']: K = 2 if param.is_symbol(): T = 1 pose = param.value else: T = plan.horizon pose = param.pose assert (K, T) == pose.shape KT = K * T if norm == 'min-vel' and not param.is_symbol(): v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) else: ## l2 Q = np.eye(KT) cur_pose = pose.reshape((KT, 1), order='F') A = -2 * cur_pose.T.dot(Q) b = cur_pose.T.dot(Q.dot(cur_pose)) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * self.transfer_coeff * Q, self.transfer_coeff * A, self.transfer_coeff * b) ll_param = self._param_to_ll[param] if param.is_symbol(): ll_grb_vars = ll_param.value.reshape((KT, 1), order='F') else: ll_grb_vars = ll_param.pose.reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(ll_grb_vars, cur_pose)) transfer_objs.append(bexpr) elif norm == 'straightline': return self._get_trajopt_obj(plan) else: raise NotImplementedError return transfer_objs def _resample(self, plan, preds): val, attr_inds = None, None for negated, pred, t in preds: ## returns a vector of new values and an ## attr_inds (OrderedDict) that gives the mapping ## to parameter attributes if (self.early_converge and self._failed_groups != ['all'] and self._failed_groups != [] and not np.any([ param.name in self._failed_groups for param in pred.params ])): ## using early converge and one group didn't converge ## but no parameter from this pred in that group continue val, attr_inds = pred.resample(negated, t, plan) ## no resample defined for that pred if val is not None: break if val is None: # import pdb; pdb.set_trace() return [] t_local = t bexprs = [] i = 0 for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] if p.is_symbol(): t_local = 0 n_vals = 0 grb_vars = [] for attr, ind_arr in attr_inds[p]: n_vals += len(ind_arr) grb_vars.extend(list(getattr(ll_p, attr)[ind_arr, t_local])) for j, grb_var in enumerate(grb_vars): ## create an objective saying stay close to this value ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2 Q = np.eye(1) A = -2 * val[i + j] * np.ones((1, 1)) b = np.ones((1, 1)) * np.power(val[i + j], 2) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff, b * self.rs_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order='F') init_val = np.ones((1, 1)) * val[i + j] bexpr = BoundExpr(quad_expr, Variable(v_arr, val[i + j].reshape((1, 1)))) bexprs.append(bexpr) i += n_vals return bexprs def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False): # verbose=True ## for debugging ignore_preds = [] priority = np.maximum(priority, 0) if not pred_dict['hl_info'] == "hl_state": start, end = pred_dict['active_timesteps'] active_range = range(start, end + 1) if verbose: print "pred being added: ", pred_dict print active_range, effective_timesteps negated = pred_dict['negated'] pred = pred_dict['pred'] if pred.get_type() in ignore_preds: return if pred.priority > priority: return assert isinstance(pred, common_predicates.ExprPredicate) expr = pred.get_expr(negated) for t in effective_timesteps: if t in active_range: if expr is not None: if add_nonlin or isinstance(expr.expr, AffExpr): if verbose: print "expr being added at time ", t var = self._spawn_sco_var_for_pred(pred, t) bexpr = BoundExpr(expr, var) self._bexpr_to_pred[bexpr] = (negated, pred, t) groups = ['all'] if self.early_converge: ## this will check for convergence per parameter ## this is good if e.g., a single trajectory quickly ## gets stuck groups.extend( [param.name for param in pred.params]) self._prob.add_cnt_expr(bexpr, groups) def _add_first_and_last_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps ## only add an action if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue for pred_dict in action.preds: if action_start >= active_ts[0]: self._add_pred_dict(pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose) if action_end <= active_ts[1]: self._add_pred_dict(pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose) ## add all of the linear ineqs timesteps = range(max(action_start + 1, active_ts[0]), min(action_end, active_ts[1])) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose) def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1]) + 1) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose) def _update_ll_params(self): for ll_param in self._param_to_ll.values(): ll_param.update_param() if self.child_solver: self.child_solver._update_ll_params() def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) horizon = active_ts[1] - active_ts[0] + 1 self._param_to_ll = {} self.ll_start = active_ts[0] for param in plan.params.values(): ll_param = LLParam(model, param, horizon, active_ts) ll_param.create_grb_vars() self._param_to_ll[param] = ll_param model.update() for ll_param in self._param_to_ll.values(): ll_param.batch_add_cnts() def _add_obj_bexprs(self, obj_bexprs): for bexpr in obj_bexprs: self._prob.add_obj_expr(bexpr) def _get_trajopt_obj(self, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) start, end = active_ts traj_objs = [] for param in plan.params.values(): if param not in self._param_to_ll: continue if param._type in ['Robot', 'Can']: T = end - start + 1 K = 2 pose = param.pose KT = K * T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) Q *= TRAJOPT_COEFF quad_expr = QuadExpr(Q, np.zeros((1, KT)), np.zeros((1, 1))) robot_ll = self._param_to_ll[param] robot_ll_grb_vars = robot_ll.pose.reshape((KT, 1), order='F') init_val = param.pose[:, start:end + 1].reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars, init_val)) traj_objs.append(bexpr) return traj_objs def _spawn_sco_var_for_pred(self, pred, t): i = 0 x = np.empty(pred.x_dim, dtype=object) v = np.empty(pred.x_dim) for p in pred.attr_inds: for attr, ind_arr in pred.attr_inds[p]: n_vals = len(ind_arr) ll_p = self._param_to_ll[p] if p.is_symbol(): x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0] v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0] else: x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, t - self.ll_start] v[i:i + n_vals] = getattr(p, attr)[ind_arr, t] i += n_vals if pred.dynamic: ## include the variables from the next time step for p in pred.attr_inds: for attr, ind_arr in pred.attr_inds[p]: n_vals = len(ind_arr) ll_p = self._param_to_ll[p] if p.is_symbol(): x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0] v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0] else: x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, t + 1 - self.ll_start] v[i:i + n_vals] = getattr(p, attr)[ind_arr, t + 1] i += n_vals assert i >= pred.x_dim x = x.reshape((pred.x_dim, 1)) v = v.reshape((pred.x_dim, 1)) return Variable(x, v)
def _solve_opt_prob(self, plan, priority, callback=None, active_ts=None, verbose=False, resample=True): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose) tol = 1e-2 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) obj_bexprs = [] if resample: failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, add_nonlin=True, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-2 elif priority == 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-4 solv = Solver() solv.initial_penalty_coeff = self.init_penalty_coeff success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=verbose) self._update_ll_params() self._failed_groups = self._prob.nonconverged_groups return success
class CanSolver(LLSolver): def __init__(self, early_converge=False): self.transfer_coeff = 1e1 self.rs_coeff = 1e10 self.initial_trust_region_size = 1e-2 self.init_penalty_coeff = 1e1 # self.init_penalty_coeff = 1e5 self.max_merit_coeff_increases = 5 self._param_to_ll = {} self.early_converge = early_converge self.child_solver = None self.solve_priorities = [2] def _solve_helper(self, plan, callback, active_ts, verbose): # certain constraints should be solved first success = False for priority in self.solve_priorities: success = self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose) # if not success: # return success # return True return success def backtrack_solve(self, plan, callback=None, anum=0, verbose=False): if anum > len(plan.actions) - 1: return True a = plan.actions[anum] active_ts = a.active_timesteps inits = {} # Moveto -> Grasp -> Movetoholding -> Putdown robot = a.params[0] lookup_preds = 'EEReachable' if a.name == 'moveto': ## moveto: (?robot - Robot ?start - RobotPose ?end - RobotPose) ## sample ?end - RobotPose rs_param = a.params[2] elif a.name == 'movetoholding': ## movetoholding: (?robot - Robot ?start - RobotPose ?end - RobotPose ?c - Can) ## sample ?end - RobotPose rs_param = a.params[2] elif a.name == 'grasp': ## grasp: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose) ## sample ?ee - EEPose rs_param = a.params[4] lookup_preds = 'InContact' elif a.name == 'putdown': ## putdown: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose) ## sample ?ep - RobotPose rs_param = a.params[5] else: raise NotImplemented def recursive_solve(): ## don't optimize over any params that are already set old_params_free = {} for p in plan.params.itervalues(): old_param_map = {} if p.is_symbol(): if p not in a.params: continue for attr in attr_map[getattr(p, '_type')]: old_param_map[attr] = p._free_attrs[attr].copy() p._free_attrs[attr][:] = 0 else: for attr in attr_map[getattr(p, '_type')]: old_param_map[attr] = p._free_attrs[ attr][:, active_ts[1]].copy() p._free_attrs[attr][:, active_ts[1]] = 0 old_params_free[p] = old_param_map self.child_solver = CanSolver() if self.child_solver.backtrack_solve(plan, callback=callback, anum=anum + 1, verbose=verbose): return True ## reset free_attrs for p in a.params: if p.is_symbol(): if p not in a.params: continue for attr in attr_map[getattr(p, '_type')]: p._free_attrs[attr] = old_params_free[p][attr] else: for attr in attr_map[getattr(p, '_type')]: p._free_attrs[attr][:, active_ts[1]] = old_params_free[ p][attr] return False if rs_param.is_fixed(attr_map[getattr(rs_param, '_type')]): ## this parameter is fixed if callback is not None: callback_a = lambda: callback(a) else: callback_a = None self.child_solver = CanSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True) if not success: ## if planning fails we're done return False ## no other options, so just return here return recursive_solve() ## so that this won't be optimized over rs_free = {} for attr in attr_map[getattr(rs_param, '_type')]: rs_free[attr] = rs_param._free_attrs[attr].copy() rs_param._free_attrs[attr][:] = 0 if callback is not None: callback_a = lambda: callback(a) else: callback_a = None ###################################################################### # Want robot_poses to be a list of dict, mapping from attr to value robot_poses = [] if a.name == 'moveto': #rs_param is ?end - RobotPose ee_poses = plan.get_param(lookup_preds, 2, {1: rs_param}) # get a collision free base_pose, with all other pose stay the same elif a.name == 'movetoholding': #rs_param is ?end - RobotPose ee_poses = plan.get_param(lookup_preds, 2, {1: rs_param}) targets = plan.get_param('InContact', 2, {1: ee_poses}) # sample ee_pose associated to this target and sample robot_pose for it elif a.name == 'grasp': #rs_param is ?ee - EEPose, robot_pose ee_poses = [rs_param] targets = plan.get_param(lookup_preds, 2, {1: rs_param}) # sample ee_pose associated to this target and sample robot_pose for it elif a.name == 'putdown': ee_poses = plan.get_param(lookup_preds, 2, {1: rs_param}) targets = [] # get a collision free base_pose, with all other pose stay the same else: raise NotImplemented # import ipdb; ipdb.set_trace() if len(ee_poses) > 1: import pdb pdb.set_trace() if len(ee_poses) == 0 or np.all([ ee_poses[0]._free_attrs[attr] for attr in attr_map[ee_poses[0]._type] ]): # Currently just sample reasonable robot end poses for the moveto action t = active_ts[0] ee_pose = ee_poses[0] targets = plan.get_param('InContact', 2, {1: ee_pose}) if len(targets) == 0 or len(targets) > 1: import ipdb ipdb.set_trace() sampled_ee_poses = sampling.get_ee_from_target( targets[0].value, targets[0].rotation) cur_robot_pose = { 'backHeight': robot.backHeight[:, t], 'lArmPose': robot.lArmPose[:, t], 'lGripper': robot.lGripper[:, t], 'rArmPose': robot.rArmPose[:, t], 'rGripper': robot.rGripper[:, t] } for ee_pose in sampled_ee_poses: possible_base = sampling.get_base_poses_around_pos( t, robot, ee_pose[0], BASE_SAMPLE_SIZE, 0.6) robot_base = sampling.closest_base_poses( possible_base, robot.pose[:, t]) robot_pose = cur_robot_pose.copy() robot_pose['value'] = robot_base robot_poses.append(robot_pose) elif np.any([ ee_poses[0]._free_attrs[attr] for attr in attr_map[ee_poses[0]._type] ]): ## there shouldn't be only some free_attrs set raise NotImplementedError else: ee_pose = ee_poses[0] targets = plan.get_param('InContact', 2, {1: ee_pose}) possible_ee_poses = sampling.get_ee_from_target( targets[0].value, targets[0].rotation) for ee_pos, ee_rot in possible_ee_poses: robot_poses.append({'value': ee_pos, 'rotation': ee_rot}) # import ipdb; ipdb.set_trace() ########################################################################## for rp in robot_poses: for attr in attr_map[rs_param._type]: dim = len(rp[attr]) setattr(rs_param, attr, rp[attr].reshape((dim, 1))) success = False self.child_solver = CanSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True) if success: if recursive_solve(): break else: success = False for attr in attr_map[getattr(rs_param, '_type')]: rs_param._free_attrs[attr] = rs_free[attr] return success def sample_rp_from_ee(t, plan, ee_poses, robot): target_pose = target.value[:, 0] dummy_body = OpenRAVEBody(plan.env, robot.name + '_dummy', robot.geom) possible_rps = [] for ee_pose in ee_poses: base_pose = sampling.get_col_free_base_pose_around_target( t, plan, ee_pose.value, robot).reshape((1, 3)) ik_arm_poses = dummy_body.ik_arm_pose(ee_pose[0], ee_pose[1]) cur_arm_poses = np.c_[robot.backHeight, robot.rArmPose].reshape( (8, )) chosen_pose = sampling.closest_arm_pose(ik_arm_poses, cur_arm_poses) torso_pose, arm_pose = chosen_pose[0], chosen_pose[1:] possible_rp = np.hstack( (base_pose, torso_pose, robot.lArmPose[:, t], robot.lGripper[:, t], rArmPose, robot.rGripper[:, t])) possible_rps.append(possible_rp) dummy_body.delete() if len(possible_rps) <= 0: print "something went wrong" # import ipdb; ipdb.set_trace() return possible_rps def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False): success = False if force_init or not plan.initialized: ## solve at priority -1 to get an initial value for the parameters self._solve_opt_prob(plan, priority=-2, callback=callback, active_ts=active_ts, verbose=verbose) self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose) plan.initialized = True success = self._solve_helper(plan, callback=callback, active_ts=active_ts, verbose=verbose) # fp = plan.get_failed_preds() # if len(fp) == 0: # return True if success: return success for _ in range(n_resamples): ## refinement loop ## priority 0 resamples the first failed predicate in the plan ## and then solves a transfer optimization that only includes linear constraints self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose) # self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) success = self._solve_opt_prob(plan, priority=2, callback=callback, active_ts=active_ts, verbose=verbose) # fp = plan.get_failed_preds() # if len(fp) == 0: # return True if success: return success return success def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -2: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions( plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions( plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = [] obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel')) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) # self._add_first_and_last_timesteps_of_actions( # plan, priority=-1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_first_and_last_timesteps_of_actions( # plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority >= 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-3 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) self._update_ll_params() print "priority: {}".format(priority) # if callback is not None: callback(True) return success def _get_transfer_obj(self, plan, norm): transfer_objs = [] if norm == 'min-vel': for param in plan.params.values(): # if param._type in ['Robot', 'Can', 'EEPose']: for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): if param.is_symbol(): T = 1 else: T = plan.horizon K = attr_type.dim attr_val = getattr(param, attr_name) # pose = param.pose assert (K, T) == attr_val.shape KT = K * T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) cur_val = attr_val.reshape((KT, 1), order='F') A = -2 * cur_val.T.dot(Q) b = cur_val.T.dot(Q.dot(cur_val)) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * self.transfer_coeff * Q, self.transfer_coeff * A, self.transfer_coeff * b) param_ll = self._param_to_ll[param] ll_attr_val = getattr(param_ll, attr_name) param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars, cur_val)) transfer_objs.append(bexpr) else: raise NotImplemented return transfer_objs def _resample(self, plan, preds): val, attr_inds = None, None for negated, pred, t in preds: ## returns a vector of new values and an ## attr_inds (OrderedDict) that gives the mapping ## to parameter attributes val, attr_inds = pred.resample(negated, t, plan) ## no resample defined for that pred if val is not None: break if val is None: return [] bexprs = [] i = 0 for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] n_vals = 0 grb_vars = [] for attr, ind_arr, t in attr_inds[p]: n_vals += len(ind_arr) grb_vars.extend(list( getattr(ll_p, attr)[ind_arr, t].flatten())) for j, grb_var in enumerate(grb_vars): ## create an objective saying stay close to this value ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2 Q = np.eye(1) A = -2 * val[i + j] * np.ones((1, 1)) b = np.ones((1, 1)) * np.power(val[i + j], 2) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff, b * self.rs_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order='F') init_val = np.ones((1, 1)) * val[i + j] bexpr = BoundExpr(quad_expr, Variable(v_arr, val[i + j].reshape((1, 1)))) bexprs.append(bexpr) i += n_vals return bexprs def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False): ## for debugging ignore_preds = [] priority = np.maximum(priority, 0) if not pred_dict['hl_info'] == "hl_state": start, end = pred_dict['active_timesteps'] active_range = range(start, end + 1) if verbose: print "pred being added: ", pred_dict print active_range, effective_timesteps negated = pred_dict['negated'] pred = pred_dict['pred'] if pred.get_type() in ignore_preds: return if pred.priority > priority: return assert isinstance(pred, common_predicates.ExprPredicate) expr = pred.get_expr(negated) for t in effective_timesteps: if t in active_range: if expr is not None: if add_nonlin or isinstance(expr.expr, AffExpr): if verbose: print "expr being added at time ", t var = self._spawn_sco_var_for_pred(pred, t) bexpr = BoundExpr(expr, var) # TODO: REMOVE line below, for tracing back predicate for debugging. bexpr.pred = pred self._bexpr_to_pred[bexpr] = (negated, pred, t) groups = ['all'] if self.early_converge: ## this will check for convergence per parameter ## this is good if e.g., a single trajectory quickly ## gets stuck groups.extend( [param.name for param in pred.params]) self._prob.add_cnt_expr(bexpr, groups) def _add_first_and_last_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps ## only add an action if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue for pred_dict in action.preds: if action_start >= active_ts[0]: self._add_pred_dict(pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose) if action_end <= active_ts[1]: self._add_pred_dict(pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose) ## add all of the linear ineqs timesteps = range(max(action_start + 1, active_ts[0]), min(action_end, active_ts[1])) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose) def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1]) + 1) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose) def _update_ll_params(self): for ll_param in self._param_to_ll.values(): ll_param.update_param() if self.child_solver: self.child_solver._update_ll_params() def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) horizon = active_ts[1] - active_ts[0] + 1 self._param_to_ll = {} self.ll_start = active_ts[0] for param in plan.params.values(): ll_param = LLParam(model, param, horizon, active_ts) ll_param.create_grb_vars() self._param_to_ll[param] = ll_param model.update() for ll_param in self._param_to_ll.values(): ll_param.batch_add_cnts() def _add_obj_bexprs(self, obj_bexprs): for bexpr in obj_bexprs: self._prob.add_obj_expr(bexpr) def _get_trajopt_obj(self, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) start, end = active_ts traj_objs = [] for param in plan.params.values(): if param not in self._param_to_ll: continue if param._type in ['Robot', 'Can']: for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): T = end - start + 1 K = attr_type.dim attr_val = getattr(param, attr_name) KT = K * T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) Q *= TRAJOPT_COEFF quad_expr = None if attr_name == 'pose' and param._type == 'Robot': quad_expr = QuadExpr(BASE_MOVE_COEFF * Q, np.zeros((1, KT)), np.zeros((1, 1))) else: quad_expr = QuadExpr(Q, np.zeros((1, KT)), np.zeros((1, 1))) param_ll = self._param_to_ll[param] ll_attr_val = getattr(param_ll, attr_name) param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F') attr_val = getattr(param, attr_name) init_val = attr_val[:, start:end + 1].reshape( (KT, 1), order='F') bexpr = BoundExpr( quad_expr, Variable(param_ll_grb_vars, init_val)) # bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars)) traj_objs.append(bexpr) return traj_objs
def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -2: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions( plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) # self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions( plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = [] obj_bexprs.extend(self._resample(plan, failed_preds)) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) # obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel')) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) # self._add_first_and_last_timesteps_of_actions( # plan, priority=-1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_first_and_last_timesteps_of_actions( # plan, priority=1, add_nonlin=True, verbose=verbose) # self._add_all_timesteps_of_actions( # plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority >= 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-3 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) self._update_ll_params() print "priority: {}".format(priority) # if callback is not None: callback(True) return success
class CanSolver(LLSolver): def __init__(self, early_converge=False): self.transfer_coeff = 1e1 self.rs_coeff = 1e6 self.initial_trust_region_size = 1e-1 self.init_penalty_coeff = 1e1 # self.init_penalty_coeff = 1e5 self.max_merit_coeff_increases = 5 self._param_to_ll = {} self.early_converge = early_converge self.child_solver = None def backtrack_solve(self, plan, callback=None, anum=0, verbose=False): if anum > len(plan.actions) - 1: return True a = plan.actions[anum] active_ts = a.active_timesteps inits = {} # Moveto -> Grasp -> Movetoholding -> Putdown if a.name == 'moveto': ## moveto: (?robot - Robot ?start - RobotPose ?end - RobotPose) ## find possible values for the final pose rs_param = a.params[2] elif a.name == 'movetoholding': ## movetoholding: (?robot - Robot ?start - RobotPose ?end - RobotPose ?c - Can) ## find possible values for the final pose rs_param = a.params[3] elif a.name == 'grasp': ## grasp: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose) ## sample the ee_pose rs_param = a.params[5] elif a.name == 'putdown': ## putdown: (?robot - Robot ?can - Can ?target - Target ?sp - RobotPose ?ee - EEPose ?ep - RobotPose) rs_param = a.params[4] else: raise NotImplemented def recursive_solve(): ## don't optimize over any params that are already set old_params_free = {} for p in plan.params.itervalues(): old_param_map = {} if p.is_symbol(): if p not in a.params: continue for attr in attr_map[getattr(p, '_type')]: old_param_map[attr] = p._free_attrs[attr].copy() p._free_attrs[attr][:] = 0 else: for attr in attr_map[getattr(p, '_type')]: old_param_map[attr] = p._free_attrs[ attr][:, active_ts[1]].copy() p._free_attrs[attr][:, active_ts[1]] = 0 old_params_free[p] = old_param_map self.child_solver = CanSolver() if self.child_solver.backtrack_solve(plan, callback=callback, anum=anum + 1, verbose=verbose): return True ## reset free_attrs for p in a.params: if p.is_symbol(): if p not in a.params: continue for attr in attr_map[getattr(p, '_type')]: p._free_attrs[attr] = old_params_free[p][attr] else: for attr in attr_map[getattr(p, '_type')]: p._free_attrs[attr][:, active_ts[1]] = old_params_free[ p][attr] return False if rs_param.is_fixed(attr_map[getattr(rs_param, '_type')]): ## this parameter is fixed if callback is not None: callback_a = lambda: callback(a) else: callback_a = None self.child_solver = CanSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True) if not success: ## if planning fails we're done return False ## no other options, so just return here return recursive_solve() ## so that this won't be optimized over rs_free = {} for attr in attr_map[getattr(rs_param, '_type')]: rs_free[attr] = rs_param._free_attrs[attr].copy() rs_param._free_attrs[attr][:] = 0 # Target is needed to sample the ee_pose, ee_pose is needed to sample the robot_pose # InContact, Robot, EEPose, Target targets = plan.get_param('InContact', 2, {1: rs_param}, negated=False) # EEReachable, Robot, RobotPose, EEPose robot_pose = plan.get_param('EEReachable', 1, {2: rs_param}, negated=False) if len(targets) > 1: import pdb pdb.set_trace() if callback is not None: callback_a = lambda: callback(a) else: callback_a = None robot_poses = [] if len(targets) == 0 or np.all( [targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]): ## sample 4 possible poses coords = list(itertools.product(range(WIDTH), range(HEIGHT))) random.shuffle(coords) robot_poses = [np.array(x)[:, None] for x in coords[:4]] elif np.any( [targets[0]._free_attrs[attr] for attr in attr_map[targets[0]]]): ## there shouldn't be only some free_attrs set raise NotImplementedError else: grasp_dirs = [ np.array([0, -1]), np.array([1, 0]), np.array([0, 1]), np.array([-1, 0]) ] grasp_len = plan.params['pr2'].geom.radius + targets[0].geom.radius for g_dir in grasp_dirs: grasp = (g_dir * grasp_len).reshape((2, 1)) robot_poses.append(targets[0].value + grasp) for rp in robot_poses: rs_param.value = rp success = False self.child_solver = CanSolver() success = self.child_solver.solve(plan, callback=callback_a, n_resamples=0, active_ts=active_ts, verbose=verbose, force_init=True) if success: if recursive_solve(): break else: success = False for attr in attr_map[getattr(rs_param, '_type')]: rs_param._free_attrs[attr] = rs_free[attr] return success def sample_ee_from_target(self, target): """ Sample all possible EE Pose that pr2 can grasp with target: parameter of type Target return: list of tuple in the format of (ee_pos, ee_rot) """ possible_ee_poses = [] targ_pos, targ_rot = target.value.flatten(), target.rotation.flatten() ee_pos = targ_pos.copy() target_trans = OpenRAVEBody.transform_from_obj_pose(targ_pos, targ_rot) # rotate can's local z-axis by the amount of linear spacing between 0 to 2pi angle_range = np.linspace(0, np.pi * 2, num=SAMPLE_SIZE) for rot in angle_range: target_trans = OpenRAVEBody.transform_from_obj_pose( targ_pos, targ_rot) # rotate new ee_pose around can's rotation axis rot_mat = matrixFromAxisAngle([0, 0, rot]) ee_trans = target_trans.dot(rot_mat) ee_rot = OpenRAVEBody.obj_pose_from_transform(ee_trans)[3:] possible_ee_poses.append((ee_pos, ee_rot)) return possible_ee_poses def sample_rp_from_ee(self, env, rs_param, ee_pose): """ Using Inverse Kinematics to solve all possible robot armpose and basepose that reaches this ee_pose robot_pose: list of full robot pose with format of(basepose, backHeight, lArmPose, lGripper, rArmPose, rGripper) """ ee_pos, ee_rot = ee_pose[0], ee_pose[1] base_raidus = np.linspace(-1, 1, num=BASE_SAMPLE_SIZE) poses = list(itertools.product(base_raidus, base_raidus)) # sample dummy_body = OpenRAVEBody(env, rs_param.name, PR2()) possible_robot_pose = [] for pos in poses: bp = pos + ee_pos[:2] vec = ee_pos[:2] - bp vec = vec / np.linalg.norm(vec) theta = math.atan2(vec[1], vec[0]) bp = np.array([bp[0], bp[1], theta]) dummy_body.set_pose(bp) arm_poses = robot_body.ik_arm_pose(ee_pos, ee_rot) for arm_pose in arm_poses: possible_robot_pose.append( (bp, ee_arm_poses[0], rs_param.lArmPose, rs_param.lGripper, ee_arm_poses[1:], rs_param.rGripper)) import ipdb ipdb.set_trace() dummy_body.delete() return possible_robot_pose def closest_arm_pose(self, arm_poses, cur_arm_poses): pass def solve(self, plan, callback=None, n_resamples=5, active_ts=None, verbose=False, force_init=False): success = False if force_init or not plan.initialized: ## solve at priority -1 to get an initial value for the parameters self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose) plan.initialized = True success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) if success: return success for _ in range(n_resamples): ## refinement loop ## priority 0 resamples the first failed predicate in the plan ## and then solves a transfer optimization that only includes linear constraints self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose) success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) if success: return success return success def _solve_opt_prob(self, plan, priority, callback=None, init=True, init_from_prev=False, active_ts=None, verbose=False): ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts == None: active_ts = (0, plan.horizon - 1) model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # param_to_ll_old = self._param_to_ll.copy() self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, active_ts=active_ts, verbose=verbose) tol = 1e-1 elif priority == 0: ## this should only get called with a full plan for now assert active_ts == (0, plan.horizon - 1) failed_preds = plan.get_failed_preds() ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = self._resample(plan, failed_preds) ## solve an optimization movement primitive to ## transfer current trajectories obj_bexprs.extend(self._get_transfer_obj(plan, 'min-vel')) self._add_obj_bexprs(obj_bexprs) # self._add_first_and_last_timesteps_of_actions( # plan, priority=0, add_nonlin=False) self._add_first_and_last_timesteps_of_actions(plan, priority=-1, add_nonlin=True, verbose=verbose) self._add_all_timesteps_of_actions(plan, priority=0, add_nonlin=False, verbose=verbose) tol = 1e-1 elif priority == 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol = 1e-4 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) import ipdb ipdb.set_trace() self._update_ll_params() return success def _get_transfer_obj(self, plan, norm): transfer_objs = [] if norm == 'min-vel': for param in plan.params.values(): if param._type in ['Robot', 'Can']: T = plan.horizon K = 2 pose = param.pose assert (K, T) == pose.shape KT = K * T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) cur_pose = pose.reshape((KT, 1), order='F') A = -2 * cur_pose.T.dot(Q) b = cur_pose.T.dot(Q.dot(cur_pose)) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * self.transfer_coeff * Q, self.transfer_coeff * A, self.transfer_coeff * b) ll_param = self._param_to_ll[param] ll_grb_vars = ll_param.pose.reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(ll_grb_vars, cur_pose)) transfer_objs.append(bexpr) else: raise NotImplemented return transfer_objs def _resample(self, plan, preds): val, attr_inds = None, None for negated, pred, t in preds: ## returns a vector of new values and an ## attr_inds (OrderedDict) that gives the mapping ## to parameter attributes val, attr_inds = pred.resample(negated, t, plan) ## no resample defined for that pred if val is not None: break if val is None: return None t_local = t bexprs = [] i = 0 for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] if p.is_symbol(): t_local = 0 n_vals = 0 grb_vars = [] for attr, ind_arr in attr_inds[p]: n_vals += len(ind_arr) grb_vars.extend(list(getattr(ll_p, attr)[ind_arr, t_local])) for j, grb_var in enumerate(grb_vars): ## create an objective saying stay close to this value ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2 Q = np.eye(1) A = -2 * val[i + j] * np.ones((1, 1)) b = np.ones((1, 1)) * np.power(val[i + j], 2) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2 * Q * self.rs_coeff, A * self.rs_coeff, b * self.rs_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order='F') init_val = np.ones((1, 1)) * val[i + j] bexpr = BoundExpr(quad_expr, Variable(v_arr, val[i + j].reshape((1, 1)))) bexprs.append(bexpr) i += n_vals return bexprs def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False): ## for debugging ignore_preds = [] priority = np.maximum(priority, 0) if not pred_dict['hl_info'] == "hl_state": start, end = pred_dict['active_timesteps'] active_range = range(start, end + 1) if verbose: print "pred being added: ", pred_dict print active_range, effective_timesteps negated = pred_dict['negated'] pred = pred_dict['pred'] if pred.get_type() in ignore_preds: return if pred.priority > priority: return assert isinstance(pred, common_predicates.ExprPredicate) expr = pred.get_expr(negated) for t in effective_timesteps: if t in active_range: if expr is not None: if add_nonlin or isinstance(expr.expr, AffExpr): if verbose: print "expr being added at time ", t var = self._spawn_sco_var_for_pred(pred, t) bexpr = BoundExpr(expr, var) # TODO: REMOVE line below, for tracing back predicate for debugging. bexpr.pred = pred self._bexpr_to_pred[bexpr] = (negated, pred, t) groups = ['all'] if self.early_converge: ## this will check for convergence per parameter ## this is good if e.g., a single trajectory quickly ## gets stuck groups.extend( [param.name for param in pred.params]) self._prob.add_cnt_expr(bexpr, groups) def _add_first_and_last_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps ## only add an action if action_start >= active_ts[1]: continue if action_end < active_ts[0]: continue for pred_dict in action.preds: if action_start >= active_ts[0]: self._add_pred_dict(pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose) if action_end <= active_ts[1]: self._add_pred_dict(pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose) ## add all of the linear ineqs timesteps = range(max(action_start + 1, active_ts[0]), min(action_end, active_ts[1])) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose) def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False): if active_ts == None: active_ts = (0, plan.horizon - 1) for action in plan.actions: action_start, action_end = action.active_timesteps if action_start > active_ts[1]: continue if action_end < active_ts[0]: continue timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1]) + 1) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose) def _update_ll_params(self): for ll_param in self._param_to_ll.values(): ll_param.update_param() if self.child_solver: self.child_solver._update_ll_params() def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) horizon = active_ts[1] - active_ts[0] + 1 self._param_to_ll = {} self.ll_start = active_ts[0] for param in plan.params.values(): ll_param = LLParam(model, param, horizon, active_ts) ll_param.create_grb_vars() self._param_to_ll[param] = ll_param model.update() for ll_param in self._param_to_ll.values(): ll_param.batch_add_cnts() def _add_obj_bexprs(self, obj_bexprs): for bexpr in obj_bexprs: self._prob.add_obj_expr(bexpr) def _get_trajopt_obj(self, plan, active_ts=None): if active_ts == None: active_ts = (0, plan.horizon - 1) start, end = active_ts traj_objs = [] for param in plan.params.values(): if param not in self._param_to_ll: continue if param._type in ['Robot', 'Can']: for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): T = end - start + 1 K = attr_type.dim attr_val = getattr(param, attr_name) KT = K * T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) quad_expr = QuadExpr(Q, np.zeros((1, KT)), np.zeros((1, 1))) robot_ll = self._param_to_ll[param] ll_attr_val = getattr(robot_ll, attr_name) robot_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F') # init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F') # bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars, init_val)) bexpr = BoundExpr(quad_expr, Variable(robot_ll_grb_vars)) traj_objs.append(bexpr) return traj_objs def _spawn_sco_var_for_pred(self, pred, t): i = 0 x = np.empty(pred.x_dim, dtype=object) v = np.empty(pred.x_dim) for p in pred.attr_inds: for attr, ind_arr in pred.attr_inds[p]: n_vals = len(ind_arr) ll_p = self._param_to_ll[p] if p.is_symbol(): x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0] v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0] else: x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, t - self.ll_start] v[i:i + n_vals] = getattr(p, attr)[ind_arr, t] i += n_vals if pred.dynamic: ## include the variables from the next time step for p in pred.attr_inds: for attr, ind_arr in pred.attr_inds[p]: n_vals = len(ind_arr) ll_p = self._param_to_ll[p] if p.is_symbol(): x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, 0] v[i:i + n_vals] = getattr(p, attr)[ind_arr, 0] else: x[i:i + n_vals] = getattr(ll_p, attr)[ind_arr, t + 1 - self.ll_start] v[i:i + n_vals] = getattr(p, attr)[ind_arr, t] i += n_vals assert i >= pred.x_dim x = x.reshape((pred.x_dim, 1)) v = v.reshape((pred.x_dim, 1)) return Variable(x, v)
def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False): robot = plan.params['baxter'] body = plan.env.GetRobot("baxter") if callback is not None: viewer = callback() def draw(t): viewer.draw_plan_ts(plan, t) ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # _free_attrs is paied attentioned in here self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -2: """ Initialize an linear trajectory while enforceing the linear constraints in the intermediate step. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) tol = 1e-1 elif priority == -1: """ Solve the optimization problem while enforcing every constraints. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority == 0: """ When Optimization fails, resample new values for certain timesteps of the trajectory and solver as initialization """ ## this should only get called with a full plan for now # assert active_ts == (0, plan.horizon-1) failed_preds = plan.get_failed_preds() # print "{} predicates fails, resampling process begin...\n \ # Checking {}".format(len(failed_preds), failed_preds[0]) ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = [] rs_obj = self._resample(plan, failed_preds) obj_bexprs.extend(rs_obj) # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory. # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts= active_ts, verbose=verbose) tol = 1e-3 elif priority >= 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-3 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) self._update_ll_params() print "priority: {}".format(priority) if priority == 0: # During resampling phases, there must be changes added to sampling_trace if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]: reward = 0 if len(plan.get_failed_preds()) == 0: reward = len(plan.actions) else: failed_t = plan.get_failed_pred()[2] for i in range(len(plan.actions)): if failed_t > plan.actions[i].active_timesteps[1]: reward += 1 plan.sampling_trace[-1]['reward'] = reward ##Restore free_attrs values plan.restore_free_attrs() return success
class RobotLLSolver(LLSolver): def __init__(self, early_converge=False, transfer_norm='min-vel'): self.transfer_coeff = 1e1 self.rs_coeff = 1e10 self.initial_trust_region_size = 1e-2 self.init_penalty_coeff = 1e1 # self.init_penalty_coeff = 1e5 self.max_merit_coeff_increases = 5 self._param_to_ll = {} self.early_converge=early_converge self.child_solver = None self.solve_priorities = [2] self.transfer_norm = transfer_norm def _solve_helper(self, plan, callback, active_ts, verbose): # certain constraints should be solved first success = False for priority in self.solve_priorities: success = self._solve_opt_prob(plan, priority=priority, callback=callback, active_ts=active_ts, verbose=verbose) # if not success: # return success # return True return success def solve(self, plan, callback=None, n_resamples=20, active_ts=None, verbose=False, force_init=False): success = False if force_init or not plan.initialized: ## solve at priority -1 to get an initial value for the parameters self._solve_opt_prob(plan, priority=-2, callback=callback, active_ts=active_ts, verbose=verbose) self._solve_opt_prob(plan, priority=-1, callback=callback, active_ts=active_ts, verbose=verbose) plan.initialized=True success = self._solve_helper(plan, callback=callback, active_ts=active_ts, verbose=verbose) # self.saver = PlanSerializer() # self.saver.write_plan_to_hdf5("temp_plan.hdf5", plan) if success or len(plan.get_failed_preds()) == 0: return True # return success for _ in range(n_resamples): ## refinement loop ## priority 0 resamples the first failed predicate in the plan ## and then solves a transfer optimization that only includes linear constraints self._solve_opt_prob(plan, priority=0, callback=callback, active_ts=active_ts, verbose=verbose) success = self._solve_opt_prob(plan, priority=1, callback=callback, active_ts=active_ts, verbose=verbose) if success or len(plan.get_failed_preds()) == 0: print "Optimization success after {} resampling.".format(_) return _ return success def _solve_opt_prob(self, plan, priority, callback=None, init=True, active_ts=None, verbose=False): robot = plan.params['baxter'] body = plan.env.GetRobot("baxter") if callback is not None: viewer = callback() def draw(t): viewer.draw_plan_ts(plan, t) ## active_ts is the inclusive timesteps to include ## in the optimization if active_ts==None: active_ts = (0, plan.horizon-1) plan.save_free_attrs() model = grb.Model() model.params.OutputFlag = 0 self._prob = Prob(model, callback=callback) # _free_attrs is paied attentioned in here self._spawn_parameter_to_ll_mapping(model, plan, active_ts) model.update() self._bexpr_to_pred = {} if priority == -2: """ Initialize an linear trajectory while enforceing the linear constraints in the intermediate step. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=False) tol = 1e-1 elif priority == -1: """ Solve the optimization problem while enforcing every constraints. """ obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_first_and_last_timesteps_of_actions(plan, priority=MAX_PRIORITY, active_ts=active_ts, verbose=verbose, add_nonlin=True) tol = 1e-1 elif priority == 0: """ When Optimization fails, resample new values for certain timesteps of the trajectory and solver as initialization """ ## this should only get called with a full plan for now # assert active_ts == (0, plan.horizon-1) failed_preds = plan.get_failed_preds() # print "{} predicates fails, resampling process begin...\n \ # Checking {}".format(len(failed_preds), failed_preds[0]) ## this is an objective that places ## a high value on matching the resampled values obj_bexprs = [] rs_obj = self._resample(plan, failed_preds) obj_bexprs.extend(rs_obj) # _get_transfer_obj returns the expression saying the current trajectory should be close to it's previous trajectory. # obj_bexprs.extend(self._get_trajopt_obj(plan, active_ts)) obj_bexprs.extend(self._get_transfer_obj(plan, self.transfer_norm)) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=1, add_nonlin=True, active_ts= active_ts, verbose=verbose) tol = 1e-3 elif priority >= 1: obj_bexprs = self._get_trajopt_obj(plan, active_ts) self._add_obj_bexprs(obj_bexprs) self._add_all_timesteps_of_actions(plan, priority=priority, add_nonlin=True, active_ts=active_ts, verbose=verbose) tol=1e-3 solv = Solver() solv.initial_trust_region_size = self.initial_trust_region_size solv.initial_penalty_coeff = self.init_penalty_coeff solv.max_merit_coeff_increases = self.max_merit_coeff_increases success = solv.solve(self._prob, method='penalty_sqp', tol=tol, verbose=True) self._update_ll_params() print "priority: {}".format(priority) if priority == 0: # During resampling phases, there must be changes added to sampling_trace if len(plan.sampling_trace) > 0 and 'reward' not in plan.sampling_trace[-1]: reward = 0 if len(plan.get_failed_preds()) == 0: reward = len(plan.actions) else: failed_t = plan.get_failed_pred()[2] for i in range(len(plan.actions)): if failed_t > plan.actions[i].active_timesteps[1]: reward += 1 plan.sampling_trace[-1]['reward'] = reward ##Restore free_attrs values plan.restore_free_attrs() return success def _get_transfer_obj(self, plan, norm): """ This function returns the expression e(x) = P|x - cur|^2 Which says the optimized trajectory should be close to the previous trajectory. Where P is the KT x KT matrix, where Px is the difference of parameter's attributes' current value and parameter's next timestep value """ transfer_objs = [] if norm == 'min-vel': for param in plan.params.values(): # if param._type in ['Robot', 'Can', 'EEPose']: for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): if param.is_symbol(): T = 1 else: T = plan.horizon K = attr_type.dim attr_val = getattr(param, attr_name) # pose = param.pose assert (K, T) == attr_val.shape KT = K*T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) # P = np.eye(KT) Q = np.dot(np.transpose(P), P) cur_val = attr_val.reshape((KT, 1), order='F') A = -2*cur_val.T.dot(Q) b = cur_val.T.dot(Q.dot(cur_val)) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2*self.transfer_coeff*Q, self.transfer_coeff*A, self.transfer_coeff*b) param_ll = self._param_to_ll[param] ll_attr_val = getattr(param_ll, attr_name) param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars, cur_val)) transfer_objs.append(bexpr) else: raise NotImplemented return transfer_objs def _resample(self, plan, preds): """ This function first calls fail predicate's resample function, then, uses the resampled value to create a square difference cost function e(x) = |x - rs_val|^2 that will be minimized later. rs_val is the resampled value """ val, attr_inds = None, None for negated, pred, t in preds: ## returns a vector of new values and an ## attr_inds (OrderedDict) that gives the mapping ## to parameter attributes val, attr_inds = pred.resample(negated, t, plan) ## if no resample defined for that pred, continue if val is not None: break if val is None: return [] bexprs, i = [], 0 for p in attr_inds: ## get the ll_param for p and gurobi variables ll_p = self._param_to_ll[p] n_vals = 0 grb_vars = [] for attr, ind_arr, t in attr_inds[p]: n_vals += len(ind_arr) grb_vars.extend( list(getattr(ll_p, attr)[ind_arr, t].flatten())) for j, grb_var in enumerate(grb_vars): ## create an objective saying stay close to the resampled value ## e(x) = (x - val[i+j])**2 ## e(x) = x^2 - 2*val[i+j]*x + val[i+j]^2 Q = np.eye(1) A = -2*val[i+j]*np.ones((1, 1)) b = np.ones((1, 1))*np.power(val[i+j], 2) # QuadExpr is 0.5*x^Tx + Ax + b quad_expr = QuadExpr(2*Q*self.rs_coeff, A*self.rs_coeff, b*self.rs_coeff) v_arr = np.array([grb_var]).reshape((1, 1), order='F') init_val = np.ones((1, 1))*val[i+j] bexpr = BoundExpr(quad_expr, Variable(v_arr, val[i+j].reshape((1, 1)))) bexprs.append(bexpr) i += n_vals return bexprs def _add_pred_dict(self, pred_dict, effective_timesteps, add_nonlin=True, priority=MAX_PRIORITY, verbose=False): """ This function creates constraints for the predicate and added to Prob class in sco. """ ## for debugging ignore_preds = [] priority = np.maximum(priority, 0) if not pred_dict['hl_info'] == "hl_state": start, end = pred_dict['active_timesteps'] active_range = range(start, end+1) if verbose: print "pred being added: ", pred_dict print active_range, effective_timesteps negated = pred_dict['negated'] pred = pred_dict['pred'] if pred.get_type() in ignore_preds: return if pred.priority > priority: return assert isinstance(pred, common_predicates.ExprPredicate) expr = pred.get_expr(negated) for t in effective_timesteps: if t in active_range: if expr is not None: if add_nonlin or isinstance(expr.expr, AffExpr): if verbose: print "expr being added at time ", t var = self._spawn_sco_var_for_pred(pred, t) bexpr = BoundExpr(expr, var) # TODO: REMOVE line below, for tracing back predicate for debugging. bexpr.pred = pred self._bexpr_to_pred[bexpr] = (negated, pred, t) groups = ['all'] if self.early_converge: ## this will check for convergence per parameter ## this is good if e.g., a single trajectory quickly ## gets stuck groups.extend([param.name for param in pred.params]) self._prob.add_cnt_expr(bexpr, groups) def _add_first_and_last_timesteps_of_actions(self, plan, priority = MAX_PRIORITY, add_nonlin=False, active_ts=None, verbose=False): """ This function adds all linear predicates and first and last timestep non-linear predicates from actions that are active within the range of active_ts. """ if active_ts==None: active_ts = (0, plan.horizon-1) for action in plan.actions: action_start, action_end = action.active_timesteps ## only add an action if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue for pred_dict in action.preds: if action_start >= active_ts[0]: self._add_pred_dict(pred_dict, [action_start], priority=priority, add_nonlin=add_nonlin, verbose=verbose) if action_end <= active_ts[1]: self._add_pred_dict(pred_dict, [action_end], priority=priority, add_nonlin=add_nonlin, verbose=verbose) ## add all of the linear ineqs timesteps = range(max(action_start+1, active_ts[0]), min(action_end, active_ts[1])) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, add_nonlin=False, priority=priority, verbose=verbose) def _add_all_timesteps_of_actions(self, plan, priority=MAX_PRIORITY, add_nonlin=True, active_ts=None, verbose=False): """ This function adds both linear and non-linear predicates from actions that are active within the range of active_ts. """ if active_ts==None: active_ts = (0, plan.horizon-1) for action in plan.actions: action_start, action_end = action.active_timesteps if action_start >= active_ts[1] and action_start > active_ts[0]: continue if action_end < active_ts[0]: continue timesteps = range(max(action_start, active_ts[0]), min(action_end, active_ts[1])+1) for pred_dict in action.preds: self._add_pred_dict(pred_dict, timesteps, priority=priority, add_nonlin=add_nonlin, verbose=verbose) def _update_ll_params(self): """ update plan's parameters from low level grb_vars. expected to be called after each optimization. """ for ll_param in self._param_to_ll.values(): ll_param.update_param() if self.child_solver: self.child_solver._update_ll_params() def _spawn_parameter_to_ll_mapping(self, model, plan, active_ts=None): """ This function creates low level parameters for each parameter in the plan, initialized he corresponding grb_vars for each attributes in each timestep, update the grb models adds in equality constraints, construct a dictionary as param-to-ll_param mapping. """ if active_ts == None: active_ts=(0, plan.horizon-1) horizon = active_ts[1] - active_ts[0] + 1 self._param_to_ll = {} self.ll_start = active_ts[0] for param in plan.params.values(): ll_param = LLParam(model, param, horizon, active_ts) ll_param.create_grb_vars() self._param_to_ll[param] = ll_param model.update() for ll_param in self._param_to_ll.values(): ll_param.batch_add_cnts() def _add_obj_bexprs(self, obj_bexprs): """ This function adds objective bounded expressions to the Prob class in sco. """ for bexpr in obj_bexprs: self._prob.add_obj_expr(bexpr) def _get_trajopt_obj(self, plan, active_ts=None): """ This function selects parameter of type Robot and Can and returns the expression e(x) = |Px|^2 Which optimize trajectory so that robot and can's attributes in current timestep is close to that of next timestep. forming a straight line between each end points. Where P is the KT x KT matrix, where Px is the difference of value in current timestep compare to next timestep """ if active_ts == None: active_ts = (0, plan.horizon-1) start, end = active_ts traj_objs = [] for param in plan.params.values(): if param not in self._param_to_ll: continue if param._type in ['Robot', 'Can']: for attr_name in param.__dict__.iterkeys(): attr_type = param.get_attr_type(attr_name) if issubclass(attr_type, Vector): T = end - start + 1 K = attr_type.dim attr_val = getattr(param, attr_name) KT = K*T v = -1 * np.ones((KT - K, 1)) d = np.vstack((np.ones((KT - K, 1)), np.zeros((K, 1)))) # [:,0] allows numpy to see v and d as one-dimensional so # that numpy will create a diagonal matrix with v and d as a diagonal P = np.diag(v[:, 0], K) + np.diag(d[:, 0]) Q = np.dot(np.transpose(P), P) Q *= TRAJOPT_COEFF quad_expr = None if attr_name == 'pose' and param._type == 'Robot': quad_expr = QuadExpr(BASE_MOVE_COEFF*Q, np.zeros((1,KT)), np.zeros((1,1))) else: quad_expr = QuadExpr(Q, np.zeros((1,KT)), np.zeros((1,1))) param_ll = self._param_to_ll[param] ll_attr_val = getattr(param_ll, attr_name) param_ll_grb_vars = ll_attr_val.reshape((KT, 1), order='F') attr_val = getattr(param, attr_name) init_val = attr_val[:, start:end+1].reshape((KT, 1), order='F') bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars, init_val)) # bexpr = BoundExpr(quad_expr, Variable(param_ll_grb_vars)) traj_objs.append(bexpr) return traj_objs